| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.011672527248055795, |
| "eval_steps": 500, |
| "global_step": 400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 2.9181318120139488e-05, |
| "grad_norm": 0.5635480880737305, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 0.8507, |
| "step": 1 |
| }, |
| { |
| "epoch": 5.8362636240278976e-05, |
| "grad_norm": 0.33079156279563904, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.1251, |
| "step": 2 |
| }, |
| { |
| "epoch": 8.754395436041846e-05, |
| "grad_norm": 0.3454552888870239, |
| "learning_rate": 1.5e-06, |
| "loss": 0.6399, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00011672527248055795, |
| "grad_norm": 0.4293176829814911, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.5054, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.00014590659060069743, |
| "grad_norm": 0.37919726967811584, |
| "learning_rate": 2.5e-06, |
| "loss": 0.735, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00017508790872083693, |
| "grad_norm": 0.6950544714927673, |
| "learning_rate": 3e-06, |
| "loss": 1.4197, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0002042692268409764, |
| "grad_norm": 0.38271600008010864, |
| "learning_rate": 3.5e-06, |
| "loss": 0.7222, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0002334505449611159, |
| "grad_norm": 0.3510509133338928, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.6049, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0002626318630812554, |
| "grad_norm": 0.29938340187072754, |
| "learning_rate": 4.5e-06, |
| "loss": 0.555, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.00029181318120139485, |
| "grad_norm": 0.38278627395629883, |
| "learning_rate": 5e-06, |
| "loss": 1.7384, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0003209944993215344, |
| "grad_norm": 0.3768065273761749, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 0.4364, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00035017581744167385, |
| "grad_norm": 0.3671921491622925, |
| "learning_rate": 6e-06, |
| "loss": 1.203, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.00037935713556181333, |
| "grad_norm": 0.3327710032463074, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 0.5083, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0004085384536819528, |
| "grad_norm": 0.35065436363220215, |
| "learning_rate": 7e-06, |
| "loss": 0.5446, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0004377197718020923, |
| "grad_norm": 0.40824198722839355, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.4587, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0004669010899222318, |
| "grad_norm": 0.34073805809020996, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.5617, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0004960824080423713, |
| "grad_norm": 0.3621309697628021, |
| "learning_rate": 8.5e-06, |
| "loss": 1.1623, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0005252637261625108, |
| "grad_norm": 0.31340083479881287, |
| "learning_rate": 9e-06, |
| "loss": 0.5276, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0005544450442826502, |
| "grad_norm": 0.36106982827186584, |
| "learning_rate": 9.5e-06, |
| "loss": 0.5017, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0005836263624027897, |
| "grad_norm": 0.31271892786026, |
| "learning_rate": 1e-05, |
| "loss": 0.3743, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0006128076805229292, |
| "grad_norm": 0.38480448722839355, |
| "learning_rate": 9.999997874331895e-06, |
| "loss": 1.1777, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0006419889986430688, |
| "grad_norm": 0.5181815028190613, |
| "learning_rate": 9.999991497329387e-06, |
| "loss": 0.8806, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0006711703167632082, |
| "grad_norm": 0.31474944949150085, |
| "learning_rate": 9.9999808689979e-06, |
| "loss": 0.6258, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0007003516348833477, |
| "grad_norm": 0.4090331494808197, |
| "learning_rate": 9.999965989346468e-06, |
| "loss": 0.5284, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0007295329530034872, |
| "grad_norm": 0.38257884979248047, |
| "learning_rate": 9.999946858387744e-06, |
| "loss": 0.8024, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0007587142711236267, |
| "grad_norm": 0.3842026889324188, |
| "learning_rate": 9.999923476137992e-06, |
| "loss": 1.1511, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0007878955892437661, |
| "grad_norm": 0.3617384433746338, |
| "learning_rate": 9.999895842617097e-06, |
| "loss": 0.6226, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0008170769073639056, |
| "grad_norm": 0.36202019453048706, |
| "learning_rate": 9.999863957848556e-06, |
| "loss": 1.1775, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0008462582254840451, |
| "grad_norm": 0.40996360778808594, |
| "learning_rate": 9.999827821859475e-06, |
| "loss": 0.9613, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0008754395436041846, |
| "grad_norm": 0.38747870922088623, |
| "learning_rate": 9.999787434680581e-06, |
| "loss": 0.7427, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0009046208617243241, |
| "grad_norm": 0.3686698377132416, |
| "learning_rate": 9.999742796346215e-06, |
| "loss": 1.8661, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0009338021798444636, |
| "grad_norm": 0.4100247323513031, |
| "learning_rate": 9.99969390689433e-06, |
| "loss": 0.7473, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0009629834979646031, |
| "grad_norm": 0.3075491189956665, |
| "learning_rate": 9.999640766366496e-06, |
| "loss": 0.352, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.0009921648160847426, |
| "grad_norm": 0.3251459002494812, |
| "learning_rate": 9.999583374807895e-06, |
| "loss": 1.0352, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.001021346134204882, |
| "grad_norm": 0.31156864762306213, |
| "learning_rate": 9.999521732267327e-06, |
| "loss": 0.4442, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0010505274523250215, |
| "grad_norm": 0.7785539627075195, |
| "learning_rate": 9.999455838797207e-06, |
| "loss": 1.0465, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.001079708770445161, |
| "grad_norm": 0.3474777936935425, |
| "learning_rate": 9.999385694453557e-06, |
| "loss": 0.5922, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0011088900885653005, |
| "grad_norm": 0.3285025656223297, |
| "learning_rate": 9.99931129929602e-06, |
| "loss": 0.6125, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.00113807140668544, |
| "grad_norm": 0.31099236011505127, |
| "learning_rate": 9.999232653387854e-06, |
| "loss": 0.8929, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0011672527248055794, |
| "grad_norm": 0.9824883937835693, |
| "learning_rate": 9.999149756795927e-06, |
| "loss": 1.0796, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0011964340429257189, |
| "grad_norm": 0.37850892543792725, |
| "learning_rate": 9.999062609590723e-06, |
| "loss": 1.1134, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0012256153610458584, |
| "grad_norm": 0.35737380385398865, |
| "learning_rate": 9.998971211846343e-06, |
| "loss": 1.9588, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0012547966791659978, |
| "grad_norm": 0.5358197689056396, |
| "learning_rate": 9.998875563640495e-06, |
| "loss": 0.5388, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.0012839779972861375, |
| "grad_norm": 0.3402862846851349, |
| "learning_rate": 9.99877566505451e-06, |
| "loss": 0.5397, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.001313159315406277, |
| "grad_norm": 0.36641523241996765, |
| "learning_rate": 9.998671516173327e-06, |
| "loss": 0.5417, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0013423406335264165, |
| "grad_norm": 0.3002683222293854, |
| "learning_rate": 9.9985631170855e-06, |
| "loss": 0.5729, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.001371521951646556, |
| "grad_norm": 0.5055456161499023, |
| "learning_rate": 9.998450467883196e-06, |
| "loss": 0.5473, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0014007032697666954, |
| "grad_norm": 0.4431580603122711, |
| "learning_rate": 9.998333568662199e-06, |
| "loss": 1.6627, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0014298845878868349, |
| "grad_norm": 0.31471818685531616, |
| "learning_rate": 9.998212419521905e-06, |
| "loss": 0.359, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.0014590659060069744, |
| "grad_norm": 0.4176453649997711, |
| "learning_rate": 9.998087020565319e-06, |
| "loss": 0.9098, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0014882472241271138, |
| "grad_norm": 0.3488178253173828, |
| "learning_rate": 9.997957371899069e-06, |
| "loss": 0.5606, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.0015174285422472533, |
| "grad_norm": 0.3195094168186188, |
| "learning_rate": 9.997823473633388e-06, |
| "loss": 0.5484, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0015466098603673928, |
| "grad_norm": 0.3707164525985718, |
| "learning_rate": 9.997685325882125e-06, |
| "loss": 1.0832, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.0015757911784875323, |
| "grad_norm": 0.5281696319580078, |
| "learning_rate": 9.997542928762745e-06, |
| "loss": 0.4327, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0016049724966076717, |
| "grad_norm": 0.38910332322120667, |
| "learning_rate": 9.997396282396322e-06, |
| "loss": 0.563, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0016341538147278112, |
| "grad_norm": 0.5761558413505554, |
| "learning_rate": 9.997245386907541e-06, |
| "loss": 1.533, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0016633351328479507, |
| "grad_norm": 0.3254339098930359, |
| "learning_rate": 9.997090242424711e-06, |
| "loss": 0.4337, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.0016925164509680902, |
| "grad_norm": 0.5125793814659119, |
| "learning_rate": 9.996930849079741e-06, |
| "loss": 0.8955, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.0017216977690882296, |
| "grad_norm": 0.4197414517402649, |
| "learning_rate": 9.99676720700816e-06, |
| "loss": 2.1629, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.001750879087208369, |
| "grad_norm": 0.4802038371562958, |
| "learning_rate": 9.996599316349105e-06, |
| "loss": 0.43, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0017800604053285088, |
| "grad_norm": 0.30398809909820557, |
| "learning_rate": 9.99642717724533e-06, |
| "loss": 1.0333, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0018092417234486483, |
| "grad_norm": 0.29787677526474, |
| "learning_rate": 9.996250789843203e-06, |
| "loss": 0.4196, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.0018384230415687877, |
| "grad_norm": 0.5412101745605469, |
| "learning_rate": 9.996070154292691e-06, |
| "loss": 0.9048, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.0018676043596889272, |
| "grad_norm": 0.6284111738204956, |
| "learning_rate": 9.995885270747393e-06, |
| "loss": 0.4706, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.0018967856778090667, |
| "grad_norm": 0.43187353014945984, |
| "learning_rate": 9.9956961393645e-06, |
| "loss": 1.2573, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.0019259669959292062, |
| "grad_norm": 0.3548312485218048, |
| "learning_rate": 9.995502760304829e-06, |
| "loss": 0.6652, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.0019551483140493454, |
| "grad_norm": 0.3329358398914337, |
| "learning_rate": 9.995305133732805e-06, |
| "loss": 0.4759, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.001984329632169485, |
| "grad_norm": 0.3695685565471649, |
| "learning_rate": 9.99510325981646e-06, |
| "loss": 1.257, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.0020135109502896244, |
| "grad_norm": 0.3101709485054016, |
| "learning_rate": 9.994897138727446e-06, |
| "loss": 0.5615, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.002042692268409764, |
| "grad_norm": 0.5336629152297974, |
| "learning_rate": 9.994686770641015e-06, |
| "loss": 0.8056, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0020718735865299038, |
| "grad_norm": 0.31102678179740906, |
| "learning_rate": 9.994472155736039e-06, |
| "loss": 1.0285, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.002101054904650043, |
| "grad_norm": 0.31015750765800476, |
| "learning_rate": 9.994253294194998e-06, |
| "loss": 0.6796, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.0021302362227701827, |
| "grad_norm": 1.129408597946167, |
| "learning_rate": 9.994030186203983e-06, |
| "loss": 0.9488, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.002159417540890322, |
| "grad_norm": 0.3322198987007141, |
| "learning_rate": 9.993802831952692e-06, |
| "loss": 1.6218, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0021885988590104617, |
| "grad_norm": 0.36219432950019836, |
| "learning_rate": 9.993571231634444e-06, |
| "loss": 0.4795, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.002217780177130601, |
| "grad_norm": 0.3242811858654022, |
| "learning_rate": 9.993335385446155e-06, |
| "loss": 0.4375, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.0022469614952507406, |
| "grad_norm": 0.5745208859443665, |
| "learning_rate": 9.993095293588359e-06, |
| "loss": 1.3936, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.00227614281337088, |
| "grad_norm": 0.3800385594367981, |
| "learning_rate": 9.992850956265198e-06, |
| "loss": 0.5863, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0023053241314910196, |
| "grad_norm": 0.38720518350601196, |
| "learning_rate": 9.992602373684426e-06, |
| "loss": 0.6313, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.002334505449611159, |
| "grad_norm": 0.5431109070777893, |
| "learning_rate": 9.992349546057403e-06, |
| "loss": 0.7509, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0023636867677312985, |
| "grad_norm": 0.36960089206695557, |
| "learning_rate": 9.9920924735991e-06, |
| "loss": 0.9468, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0023928680858514378, |
| "grad_norm": 0.3455142080783844, |
| "learning_rate": 9.991831156528095e-06, |
| "loss": 0.6766, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.0024220494039715774, |
| "grad_norm": 0.288099080324173, |
| "learning_rate": 9.991565595066582e-06, |
| "loss": 0.3018, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.0024512307220917167, |
| "grad_norm": 0.3686278164386749, |
| "learning_rate": 9.991295789440357e-06, |
| "loss": 0.6696, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.0024804120402118564, |
| "grad_norm": 0.28621384501457214, |
| "learning_rate": 9.991021739878828e-06, |
| "loss": 0.4126, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.0025095933583319957, |
| "grad_norm": 0.33376601338386536, |
| "learning_rate": 9.990743446615008e-06, |
| "loss": 0.6864, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.0025387746764521353, |
| "grad_norm": 0.36860471963882446, |
| "learning_rate": 9.990460909885522e-06, |
| "loss": 0.5158, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.002567955994572275, |
| "grad_norm": 0.4445946216583252, |
| "learning_rate": 9.9901741299306e-06, |
| "loss": 1.2633, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.0025971373126924143, |
| "grad_norm": 0.29571008682250977, |
| "learning_rate": 9.989883106994086e-06, |
| "loss": 0.9292, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.002626318630812554, |
| "grad_norm": 0.3927260935306549, |
| "learning_rate": 9.989587841323423e-06, |
| "loss": 1.0559, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0026554999489326932, |
| "grad_norm": 0.36762961745262146, |
| "learning_rate": 9.98928833316967e-06, |
| "loss": 0.913, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.002684681267052833, |
| "grad_norm": 0.3317353427410126, |
| "learning_rate": 9.988984582787482e-06, |
| "loss": 0.6172, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.002713862585172972, |
| "grad_norm": 0.465626984834671, |
| "learning_rate": 9.988676590435133e-06, |
| "loss": 0.7472, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.002743043903293112, |
| "grad_norm": 1.3269602060317993, |
| "learning_rate": 9.9883643563745e-06, |
| "loss": 0.6623, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.002772225221413251, |
| "grad_norm": 0.4574294984340668, |
| "learning_rate": 9.988047880871063e-06, |
| "loss": 1.3339, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.002801406539533391, |
| "grad_norm": 0.34987109899520874, |
| "learning_rate": 9.98772716419391e-06, |
| "loss": 0.6319, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.00283058785765353, |
| "grad_norm": 0.402045875787735, |
| "learning_rate": 9.98740220661574e-06, |
| "loss": 0.6483, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.0028597691757736698, |
| "grad_norm": 0.29255297780036926, |
| "learning_rate": 9.987073008412847e-06, |
| "loss": 0.5413, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.002888950493893809, |
| "grad_norm": 0.3598422110080719, |
| "learning_rate": 9.986739569865143e-06, |
| "loss": 0.595, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.0029181318120139487, |
| "grad_norm": 0.38826480507850647, |
| "learning_rate": 9.986401891256139e-06, |
| "loss": 0.6546, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.002947313130134088, |
| "grad_norm": 0.42309150099754333, |
| "learning_rate": 9.98605997287295e-06, |
| "loss": 1.0045, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.0029764944482542277, |
| "grad_norm": 0.4501197338104248, |
| "learning_rate": 9.9857138150063e-06, |
| "loss": 0.8104, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.003005675766374367, |
| "grad_norm": 0.3553255796432495, |
| "learning_rate": 9.985363417950515e-06, |
| "loss": 0.6416, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.0030348570844945066, |
| "grad_norm": 0.30859172344207764, |
| "learning_rate": 9.985008782003524e-06, |
| "loss": 0.5201, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.0030640384026146463, |
| "grad_norm": 0.3903139531612396, |
| "learning_rate": 9.984649907466868e-06, |
| "loss": 0.8591, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.0030932197207347856, |
| "grad_norm": 0.3806699514389038, |
| "learning_rate": 9.98428679464568e-06, |
| "loss": 0.6575, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.0031224010388549253, |
| "grad_norm": 0.7828800082206726, |
| "learning_rate": 9.983919443848706e-06, |
| "loss": 0.63, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.0031515823569750645, |
| "grad_norm": 0.32453709840774536, |
| "learning_rate": 9.98354785538829e-06, |
| "loss": 0.489, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.003180763675095204, |
| "grad_norm": 0.4170776307582855, |
| "learning_rate": 9.983172029580387e-06, |
| "loss": 1.1076, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.0032099449932153435, |
| "grad_norm": 0.3523752689361572, |
| "learning_rate": 9.982791966744545e-06, |
| "loss": 0.6405, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.003239126311335483, |
| "grad_norm": 0.32706937193870544, |
| "learning_rate": 9.98240766720392e-06, |
| "loss": 1.0937, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.0032683076294556224, |
| "grad_norm": 0.3406533896923065, |
| "learning_rate": 9.982019131285268e-06, |
| "loss": 1.3389, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.003297488947575762, |
| "grad_norm": 0.376828134059906, |
| "learning_rate": 9.98162635931895e-06, |
| "loss": 0.636, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.0033266702656959014, |
| "grad_norm": 0.35637038946151733, |
| "learning_rate": 9.981229351638926e-06, |
| "loss": 0.4319, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.003355851583816041, |
| "grad_norm": 0.5207456350326538, |
| "learning_rate": 9.980828108582759e-06, |
| "loss": 0.6011, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0033850329019361803, |
| "grad_norm": 0.464778333902359, |
| "learning_rate": 9.980422630491614e-06, |
| "loss": 0.7913, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.00341421422005632, |
| "grad_norm": 0.3464951515197754, |
| "learning_rate": 9.980012917710254e-06, |
| "loss": 0.5774, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.0034433955381764593, |
| "grad_norm": 0.36604249477386475, |
| "learning_rate": 9.979598970587046e-06, |
| "loss": 0.7515, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.003472576856296599, |
| "grad_norm": 0.35948342084884644, |
| "learning_rate": 9.979180789473955e-06, |
| "loss": 0.4906, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.003501758174416738, |
| "grad_norm": 0.3790506422519684, |
| "learning_rate": 9.978758374726544e-06, |
| "loss": 0.7257, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.003530939492536878, |
| "grad_norm": 0.36446383595466614, |
| "learning_rate": 9.978331726703984e-06, |
| "loss": 0.6115, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.0035601208106570176, |
| "grad_norm": 0.4974438548088074, |
| "learning_rate": 9.977900845769037e-06, |
| "loss": 0.6018, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.003589302128777157, |
| "grad_norm": 0.4783862233161926, |
| "learning_rate": 9.977465732288065e-06, |
| "loss": 0.7256, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.0036184834468972965, |
| "grad_norm": 0.35444504022598267, |
| "learning_rate": 9.977026386631032e-06, |
| "loss": 0.5428, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.003647664765017436, |
| "grad_norm": 0.44525983929634094, |
| "learning_rate": 9.9765828091715e-06, |
| "loss": 0.7336, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.0036768460831375755, |
| "grad_norm": 0.32913491129875183, |
| "learning_rate": 9.97613500028663e-06, |
| "loss": 0.6213, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.0037060274012577147, |
| "grad_norm": 0.3486779034137726, |
| "learning_rate": 9.975682960357176e-06, |
| "loss": 1.1807, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.0037352087193778544, |
| "grad_norm": 0.2928440570831299, |
| "learning_rate": 9.975226689767494e-06, |
| "loss": 0.5046, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.0037643900374979937, |
| "grad_norm": 0.3638307750225067, |
| "learning_rate": 9.974766188905535e-06, |
| "loss": 1.2703, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.0037935713556181334, |
| "grad_norm": 0.478950560092926, |
| "learning_rate": 9.97430145816285e-06, |
| "loss": 1.1034, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0038227526737382726, |
| "grad_norm": 0.5774679183959961, |
| "learning_rate": 9.973832497934583e-06, |
| "loss": 0.5785, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.0038519339918584123, |
| "grad_norm": 0.3301682770252228, |
| "learning_rate": 9.973359308619476e-06, |
| "loss": 0.6012, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.0038811153099785516, |
| "grad_norm": 0.4451266825199127, |
| "learning_rate": 9.972881890619865e-06, |
| "loss": 0.6879, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.003910296628098691, |
| "grad_norm": 0.6361525654792786, |
| "learning_rate": 9.972400244341685e-06, |
| "loss": 0.8636, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.003939477946218831, |
| "grad_norm": 0.3009544909000397, |
| "learning_rate": 9.971914370194462e-06, |
| "loss": 0.5197, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.00396865926433897, |
| "grad_norm": 0.36018285155296326, |
| "learning_rate": 9.97142426859132e-06, |
| "loss": 0.6695, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.0039978405824591095, |
| "grad_norm": 0.4810916781425476, |
| "learning_rate": 9.970929939948978e-06, |
| "loss": 0.9842, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.004027021900579249, |
| "grad_norm": 0.3791263699531555, |
| "learning_rate": 9.970431384687741e-06, |
| "loss": 0.6019, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.004056203218699389, |
| "grad_norm": 0.35318222641944885, |
| "learning_rate": 9.969928603231523e-06, |
| "loss": 1.1406, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.004085384536819528, |
| "grad_norm": 0.42550453543663025, |
| "learning_rate": 9.969421596007817e-06, |
| "loss": 0.7477, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.004114565854939667, |
| "grad_norm": 0.4141107201576233, |
| "learning_rate": 9.968910363447715e-06, |
| "loss": 1.1222, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0041437471730598075, |
| "grad_norm": 0.2917640507221222, |
| "learning_rate": 9.968394905985905e-06, |
| "loss": 0.3913, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.004172928491179947, |
| "grad_norm": 0.38363099098205566, |
| "learning_rate": 9.967875224060658e-06, |
| "loss": 1.0972, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.004202109809300086, |
| "grad_norm": 0.5850480794906616, |
| "learning_rate": 9.967351318113847e-06, |
| "loss": 0.7765, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.004231291127420225, |
| "grad_norm": 0.31228914856910706, |
| "learning_rate": 9.96682318859093e-06, |
| "loss": 0.4522, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.004260472445540365, |
| "grad_norm": 0.45077505707740784, |
| "learning_rate": 9.96629083594096e-06, |
| "loss": 0.7652, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.004289653763660505, |
| "grad_norm": 0.32041504979133606, |
| "learning_rate": 9.965754260616576e-06, |
| "loss": 0.5541, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.004318835081780644, |
| "grad_norm": 0.34780099987983704, |
| "learning_rate": 9.965213463074013e-06, |
| "loss": 0.8193, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.004348016399900783, |
| "grad_norm": 0.36673223972320557, |
| "learning_rate": 9.964668443773094e-06, |
| "loss": 1.1096, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.004377197718020923, |
| "grad_norm": 0.4030401110649109, |
| "learning_rate": 9.964119203177228e-06, |
| "loss": 0.7801, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0044063790361410626, |
| "grad_norm": 0.5267347693443298, |
| "learning_rate": 9.963565741753418e-06, |
| "loss": 0.6537, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.004435560354261202, |
| "grad_norm": 0.6500905156135559, |
| "learning_rate": 9.963008059972255e-06, |
| "loss": 0.6598, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.004464741672381341, |
| "grad_norm": 0.44937339425086975, |
| "learning_rate": 9.962446158307914e-06, |
| "loss": 0.9024, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.004493922990501481, |
| "grad_norm": 0.47618094086647034, |
| "learning_rate": 9.961880037238168e-06, |
| "loss": 0.6525, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0045231043086216205, |
| "grad_norm": 0.811037003993988, |
| "learning_rate": 9.961309697244366e-06, |
| "loss": 1.2114, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.00455228562674176, |
| "grad_norm": 0.30810266733169556, |
| "learning_rate": 9.960735138811451e-06, |
| "loss": 0.6901, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0045814669448619, |
| "grad_norm": 0.4555955231189728, |
| "learning_rate": 9.960156362427949e-06, |
| "loss": 0.822, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.004610648262982039, |
| "grad_norm": 0.3224095106124878, |
| "learning_rate": 9.959573368585979e-06, |
| "loss": 0.5978, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.004639829581102178, |
| "grad_norm": 0.37293335795402527, |
| "learning_rate": 9.95898615778124e-06, |
| "loss": 0.6142, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.004669010899222318, |
| "grad_norm": 0.44633975625038147, |
| "learning_rate": 9.958394730513014e-06, |
| "loss": 0.5834, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.004698192217342458, |
| "grad_norm": 0.3741106688976288, |
| "learning_rate": 9.957799087284177e-06, |
| "loss": 0.7021, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.004727373535462597, |
| "grad_norm": 0.35581302642822266, |
| "learning_rate": 9.957199228601183e-06, |
| "loss": 0.7703, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.004756554853582736, |
| "grad_norm": 0.3660070300102234, |
| "learning_rate": 9.956595154974073e-06, |
| "loss": 1.0133, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.0047857361717028755, |
| "grad_norm": 0.35619139671325684, |
| "learning_rate": 9.955986866916472e-06, |
| "loss": 0.5734, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.004814917489823016, |
| "grad_norm": 0.3273680806159973, |
| "learning_rate": 9.955374364945585e-06, |
| "loss": 1.1949, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.004844098807943155, |
| "grad_norm": 0.3657272458076477, |
| "learning_rate": 9.954757649582202e-06, |
| "loss": 0.4649, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.004873280126063294, |
| "grad_norm": 0.3416500985622406, |
| "learning_rate": 9.9541367213507e-06, |
| "loss": 0.817, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.004902461444183433, |
| "grad_norm": 0.3144441545009613, |
| "learning_rate": 9.95351158077903e-06, |
| "loss": 0.6406, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.0049316427623035735, |
| "grad_norm": 0.4005574584007263, |
| "learning_rate": 9.952882228398731e-06, |
| "loss": 0.7143, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.004960824080423713, |
| "grad_norm": 0.45650023221969604, |
| "learning_rate": 9.952248664744919e-06, |
| "loss": 0.565, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.004990005398543852, |
| "grad_norm": 0.3257487416267395, |
| "learning_rate": 9.951610890356291e-06, |
| "loss": 0.5586, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.005019186716663991, |
| "grad_norm": 0.3424636721611023, |
| "learning_rate": 9.95096890577513e-06, |
| "loss": 1.0405, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.005048368034784131, |
| "grad_norm": 0.45603424310684204, |
| "learning_rate": 9.950322711547292e-06, |
| "loss": 0.7305, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.005077549352904271, |
| "grad_norm": 0.4074293375015259, |
| "learning_rate": 9.949672308222214e-06, |
| "loss": 0.5625, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.00510673067102441, |
| "grad_norm": 0.40585842728614807, |
| "learning_rate": 9.949017696352914e-06, |
| "loss": 0.8139, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.00513591198914455, |
| "grad_norm": 0.3102453351020813, |
| "learning_rate": 9.948358876495985e-06, |
| "loss": 0.7125, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.005165093307264689, |
| "grad_norm": 0.3370908796787262, |
| "learning_rate": 9.947695849211603e-06, |
| "loss": 0.5844, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.005194274625384829, |
| "grad_norm": 0.3051895499229431, |
| "learning_rate": 9.947028615063515e-06, |
| "loss": 0.5174, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.005223455943504968, |
| "grad_norm": 0.3698843717575073, |
| "learning_rate": 9.946357174619052e-06, |
| "loss": 0.4539, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.005252637261625108, |
| "grad_norm": 0.29240456223487854, |
| "learning_rate": 9.945681528449116e-06, |
| "loss": 0.5571, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.005281818579745247, |
| "grad_norm": 0.3231453001499176, |
| "learning_rate": 9.945001677128185e-06, |
| "loss": 1.1014, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.0053109998978653865, |
| "grad_norm": 0.3621669113636017, |
| "learning_rate": 9.944317621234318e-06, |
| "loss": 0.6259, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.005340181215985526, |
| "grad_norm": 0.3420839011669159, |
| "learning_rate": 9.943629361349143e-06, |
| "loss": 0.6329, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.005369362534105666, |
| "grad_norm": 0.45090600848197937, |
| "learning_rate": 9.942936898057866e-06, |
| "loss": 1.0192, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.005398543852225805, |
| "grad_norm": 0.32308369874954224, |
| "learning_rate": 9.942240231949263e-06, |
| "loss": 0.5465, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.005427725170345944, |
| "grad_norm": 0.34121567010879517, |
| "learning_rate": 9.94153936361569e-06, |
| "loss": 0.6829, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.005456906488466084, |
| "grad_norm": 0.3925750255584717, |
| "learning_rate": 9.940834293653071e-06, |
| "loss": 1.0301, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.005486087806586224, |
| "grad_norm": 0.40466272830963135, |
| "learning_rate": 9.940125022660903e-06, |
| "loss": 0.8774, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.005515269124706363, |
| "grad_norm": 0.3522239625453949, |
| "learning_rate": 9.939411551242258e-06, |
| "loss": 0.7484, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.005544450442826502, |
| "grad_norm": 0.28628867864608765, |
| "learning_rate": 9.938693880003775e-06, |
| "loss": 0.7138, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0055736317609466415, |
| "grad_norm": 0.3318323791027069, |
| "learning_rate": 9.937972009555667e-06, |
| "loss": 0.514, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.005602813079066782, |
| "grad_norm": 0.34792056679725647, |
| "learning_rate": 9.937245940511719e-06, |
| "loss": 0.5826, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.005631994397186921, |
| "grad_norm": 0.3346732556819916, |
| "learning_rate": 9.93651567348928e-06, |
| "loss": 0.4349, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.00566117571530706, |
| "grad_norm": 0.37206366658210754, |
| "learning_rate": 9.935781209109274e-06, |
| "loss": 1.2471, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.0056903570334272, |
| "grad_norm": 0.4244491755962372, |
| "learning_rate": 9.935042547996194e-06, |
| "loss": 0.9581, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.0057195383515473396, |
| "grad_norm": 0.35628461837768555, |
| "learning_rate": 9.934299690778096e-06, |
| "loss": 0.4569, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.005748719669667479, |
| "grad_norm": 0.32731014490127563, |
| "learning_rate": 9.933552638086607e-06, |
| "loss": 0.5726, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.005777900987787618, |
| "grad_norm": 0.3174387514591217, |
| "learning_rate": 9.932801390556926e-06, |
| "loss": 1.3027, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.005807082305907758, |
| "grad_norm": 0.3262479305267334, |
| "learning_rate": 9.932045948827809e-06, |
| "loss": 1.1214, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.0058362636240278975, |
| "grad_norm": 0.3247474431991577, |
| "learning_rate": 9.931286313541586e-06, |
| "loss": 0.4761, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.005865444942148037, |
| "grad_norm": 0.38119345903396606, |
| "learning_rate": 9.930522485344149e-06, |
| "loss": 0.882, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.005894626260268176, |
| "grad_norm": 0.29767370223999023, |
| "learning_rate": 9.929754464884958e-06, |
| "loss": 0.6177, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.005923807578388316, |
| "grad_norm": 0.4254716634750366, |
| "learning_rate": 9.928982252817032e-06, |
| "loss": 0.6291, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.005952988896508455, |
| "grad_norm": 0.39530616998672485, |
| "learning_rate": 9.928205849796963e-06, |
| "loss": 0.9351, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.005982170214628595, |
| "grad_norm": 0.6373737454414368, |
| "learning_rate": 9.927425256484894e-06, |
| "loss": 0.6, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.006011351532748734, |
| "grad_norm": 0.37412020564079285, |
| "learning_rate": 9.926640473544545e-06, |
| "loss": 0.6374, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.006040532850868874, |
| "grad_norm": 0.33179807662963867, |
| "learning_rate": 9.925851501643186e-06, |
| "loss": 0.715, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.006069714168989013, |
| "grad_norm": 0.3322802186012268, |
| "learning_rate": 9.925058341451659e-06, |
| "loss": 0.6499, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.0060988954871091525, |
| "grad_norm": 0.34102705121040344, |
| "learning_rate": 9.924260993644357e-06, |
| "loss": 0.4996, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.006128076805229293, |
| "grad_norm": 0.3628920614719391, |
| "learning_rate": 9.92345945889924e-06, |
| "loss": 1.0272, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.006157258123349432, |
| "grad_norm": 0.3197779357433319, |
| "learning_rate": 9.92265373789783e-06, |
| "loss": 0.4657, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.006186439441469571, |
| "grad_norm": 0.3385450839996338, |
| "learning_rate": 9.9218438313252e-06, |
| "loss": 0.5419, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.00621562075958971, |
| "grad_norm": 0.6793949604034424, |
| "learning_rate": 9.921029739869993e-06, |
| "loss": 1.5859, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.0062448020777098505, |
| "grad_norm": 0.3696063756942749, |
| "learning_rate": 9.920211464224398e-06, |
| "loss": 0.5275, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.00627398339582999, |
| "grad_norm": 0.3089805841445923, |
| "learning_rate": 9.919389005084173e-06, |
| "loss": 1.0725, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.006303164713950129, |
| "grad_norm": 0.35823842883110046, |
| "learning_rate": 9.918562363148625e-06, |
| "loss": 0.58, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.006332346032070268, |
| "grad_norm": 0.35605373978614807, |
| "learning_rate": 9.917731539120623e-06, |
| "loss": 0.8045, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.006361527350190408, |
| "grad_norm": 0.9635096192359924, |
| "learning_rate": 9.916896533706587e-06, |
| "loss": 0.6127, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.006390708668310548, |
| "grad_norm": 0.44269976019859314, |
| "learning_rate": 9.916057347616496e-06, |
| "loss": 1.1253, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.006419889986430687, |
| "grad_norm": 0.35200875997543335, |
| "learning_rate": 9.915213981563882e-06, |
| "loss": 0.7173, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.006449071304550826, |
| "grad_norm": 0.3846692442893982, |
| "learning_rate": 9.914366436265834e-06, |
| "loss": 0.625, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.006478252622670966, |
| "grad_norm": 1.3158338069915771, |
| "learning_rate": 9.913514712442987e-06, |
| "loss": 0.4931, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.006507433940791106, |
| "grad_norm": 0.3677213191986084, |
| "learning_rate": 9.912658810819537e-06, |
| "loss": 0.6044, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.006536615258911245, |
| "grad_norm": 0.37323689460754395, |
| "learning_rate": 9.911798732123231e-06, |
| "loss": 0.8516, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.006565796577031384, |
| "grad_norm": 0.41386744379997253, |
| "learning_rate": 9.910934477085363e-06, |
| "loss": 0.633, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.006594977895151524, |
| "grad_norm": 0.3256453573703766, |
| "learning_rate": 9.91006604644078e-06, |
| "loss": 0.5754, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.0066241592132716635, |
| "grad_norm": 0.42724573612213135, |
| "learning_rate": 9.909193440927882e-06, |
| "loss": 0.9702, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.006653340531391803, |
| "grad_norm": 0.30440038442611694, |
| "learning_rate": 9.908316661288617e-06, |
| "loss": 0.6389, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.006682521849511943, |
| "grad_norm": 0.3871992528438568, |
| "learning_rate": 9.907435708268483e-06, |
| "loss": 1.7041, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.006711703167632082, |
| "grad_norm": 0.3268488049507141, |
| "learning_rate": 9.906550582616521e-06, |
| "loss": 1.0783, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.006740884485752221, |
| "grad_norm": 0.3559863865375519, |
| "learning_rate": 9.90566128508533e-06, |
| "loss": 1.0627, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.006770065803872361, |
| "grad_norm": 0.3503190577030182, |
| "learning_rate": 9.904767816431043e-06, |
| "loss": 1.1182, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.006799247121992501, |
| "grad_norm": 0.41551750898361206, |
| "learning_rate": 9.903870177413354e-06, |
| "loss": 1.0803, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.00682842844011264, |
| "grad_norm": 0.32648736238479614, |
| "learning_rate": 9.902968368795496e-06, |
| "loss": 0.4153, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.006857609758232779, |
| "grad_norm": 0.5350513458251953, |
| "learning_rate": 9.902062391344245e-06, |
| "loss": 1.5776, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.0068867910763529185, |
| "grad_norm": 0.4450839161872864, |
| "learning_rate": 9.901152245829922e-06, |
| "loss": 0.4149, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.006915972394473059, |
| "grad_norm": 0.31524381041526794, |
| "learning_rate": 9.900237933026397e-06, |
| "loss": 0.5053, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.006945153712593198, |
| "grad_norm": 0.36861109733581543, |
| "learning_rate": 9.899319453711081e-06, |
| "loss": 1.1878, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.006974335030713337, |
| "grad_norm": 0.43096283078193665, |
| "learning_rate": 9.898396808664924e-06, |
| "loss": 0.827, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.007003516348833476, |
| "grad_norm": 0.4168417453765869, |
| "learning_rate": 9.89746999867242e-06, |
| "loss": 1.0594, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.0070326976669536165, |
| "grad_norm": 0.7598991394042969, |
| "learning_rate": 9.89653902452161e-06, |
| "loss": 1.001, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.007061878985073756, |
| "grad_norm": 0.4895627796649933, |
| "learning_rate": 9.895603887004068e-06, |
| "loss": 1.327, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.007091060303193895, |
| "grad_norm": 0.5062536597251892, |
| "learning_rate": 9.894664586914911e-06, |
| "loss": 0.9793, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.007120241621314035, |
| "grad_norm": 0.3142737150192261, |
| "learning_rate": 9.893721125052794e-06, |
| "loss": 0.6362, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.0071494229394341744, |
| "grad_norm": 0.316954642534256, |
| "learning_rate": 9.892773502219913e-06, |
| "loss": 0.3909, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.007178604257554314, |
| "grad_norm": 0.3828858733177185, |
| "learning_rate": 9.891821719222e-06, |
| "loss": 0.529, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.007207785575674453, |
| "grad_norm": 0.35143372416496277, |
| "learning_rate": 9.890865776868324e-06, |
| "loss": 1.0562, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.007236966893794593, |
| "grad_norm": 0.4023430347442627, |
| "learning_rate": 9.889905675971694e-06, |
| "loss": 0.729, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.007266148211914732, |
| "grad_norm": 0.36702919006347656, |
| "learning_rate": 9.888941417348453e-06, |
| "loss": 1.1581, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.007295329530034872, |
| "grad_norm": 0.36125248670578003, |
| "learning_rate": 9.887973001818473e-06, |
| "loss": 0.5188, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.007324510848155011, |
| "grad_norm": 0.32154127955436707, |
| "learning_rate": 9.887000430205173e-06, |
| "loss": 0.5811, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.007353692166275151, |
| "grad_norm": 0.3822373151779175, |
| "learning_rate": 9.886023703335493e-06, |
| "loss": 0.69, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.00738287348439529, |
| "grad_norm": 0.4168561100959778, |
| "learning_rate": 9.885042822039915e-06, |
| "loss": 0.9765, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.0074120548025154295, |
| "grad_norm": 0.4219783842563629, |
| "learning_rate": 9.884057787152451e-06, |
| "loss": 1.3698, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.007441236120635569, |
| "grad_norm": 0.32248085737228394, |
| "learning_rate": 9.88306859951064e-06, |
| "loss": 0.4901, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.007470417438755709, |
| "grad_norm": 0.4942518174648285, |
| "learning_rate": 9.88207525995556e-06, |
| "loss": 0.7452, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.007499598756875848, |
| "grad_norm": 0.32152867317199707, |
| "learning_rate": 9.881077769331811e-06, |
| "loss": 0.6269, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.007528780074995987, |
| "grad_norm": 0.4080241322517395, |
| "learning_rate": 9.88007612848753e-06, |
| "loss": 0.9697, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.007557961393116127, |
| "grad_norm": 0.40665292739868164, |
| "learning_rate": 9.879070338274379e-06, |
| "loss": 0.6378, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.007587142711236267, |
| "grad_norm": 0.43658050894737244, |
| "learning_rate": 9.878060399547547e-06, |
| "loss": 1.5812, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.007616324029356406, |
| "grad_norm": 0.3814539611339569, |
| "learning_rate": 9.877046313165754e-06, |
| "loss": 0.8156, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.007645505347476545, |
| "grad_norm": 0.40121573209762573, |
| "learning_rate": 9.876028079991242e-06, |
| "loss": 0.735, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.007674686665596685, |
| "grad_norm": 0.3110452890396118, |
| "learning_rate": 9.875005700889782e-06, |
| "loss": 1.0318, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.007703867983716825, |
| "grad_norm": 0.36410999298095703, |
| "learning_rate": 9.87397917673067e-06, |
| "loss": 1.0398, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.007733049301836964, |
| "grad_norm": 0.3949889838695526, |
| "learning_rate": 9.872948508386727e-06, |
| "loss": 1.3404, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.007762230619957103, |
| "grad_norm": 0.3452715277671814, |
| "learning_rate": 9.871913696734293e-06, |
| "loss": 1.7137, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.007791411938077243, |
| "grad_norm": 0.49451926350593567, |
| "learning_rate": 9.870874742653238e-06, |
| "loss": 0.6079, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.007820593256197382, |
| "grad_norm": 0.353254497051239, |
| "learning_rate": 9.869831647026948e-06, |
| "loss": 1.5703, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.007849774574317523, |
| "grad_norm": 0.39976930618286133, |
| "learning_rate": 9.868784410742337e-06, |
| "loss": 0.8591, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.007878955892437662, |
| "grad_norm": 0.3353979289531708, |
| "learning_rate": 9.867733034689828e-06, |
| "loss": 0.4523, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.007908137210557801, |
| "grad_norm": 0.332116037607193, |
| "learning_rate": 9.866677519763381e-06, |
| "loss": 0.6274, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.00793731852867794, |
| "grad_norm": 0.39066842198371887, |
| "learning_rate": 9.86561786686046e-06, |
| "loss": 1.1203, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.00796649984679808, |
| "grad_norm": 0.3723788261413574, |
| "learning_rate": 9.864554076882055e-06, |
| "loss": 0.5385, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.007995681164918219, |
| "grad_norm": 0.2947128713130951, |
| "learning_rate": 9.86348615073267e-06, |
| "loss": 1.0578, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.008024862483038358, |
| "grad_norm": 0.34853091835975647, |
| "learning_rate": 9.862414089320331e-06, |
| "loss": 0.4852, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.008054043801158497, |
| "grad_norm": 0.3926672637462616, |
| "learning_rate": 9.861337893556574e-06, |
| "loss": 1.1969, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.008083225119278638, |
| "grad_norm": 0.7392916083335876, |
| "learning_rate": 9.860257564356452e-06, |
| "loss": 1.2007, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.008112406437398778, |
| "grad_norm": 0.42414942383766174, |
| "learning_rate": 9.859173102638538e-06, |
| "loss": 1.0842, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.008141587755518917, |
| "grad_norm": 0.35072061419487, |
| "learning_rate": 9.858084509324908e-06, |
| "loss": 1.1563, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.008170769073639056, |
| "grad_norm": 0.44247132539749146, |
| "learning_rate": 9.856991785341164e-06, |
| "loss": 0.7369, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.008199950391759196, |
| "grad_norm": 0.39098939299583435, |
| "learning_rate": 9.855894931616407e-06, |
| "loss": 0.6189, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.008229131709879335, |
| "grad_norm": 0.3257642090320587, |
| "learning_rate": 9.854793949083262e-06, |
| "loss": 1.1748, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.008258313027999474, |
| "grad_norm": 0.4087084233760834, |
| "learning_rate": 9.853688838677852e-06, |
| "loss": 0.7535, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.008287494346119615, |
| "grad_norm": 0.32719582319259644, |
| "learning_rate": 9.852579601339821e-06, |
| "loss": 1.213, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.008316675664239754, |
| "grad_norm": 0.3474045395851135, |
| "learning_rate": 9.851466238012317e-06, |
| "loss": 1.1946, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.008345856982359894, |
| "grad_norm": 0.2800231873989105, |
| "learning_rate": 9.850348749641993e-06, |
| "loss": 0.9802, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.008375038300480033, |
| "grad_norm": 0.39679044485092163, |
| "learning_rate": 9.849227137179015e-06, |
| "loss": 0.8683, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.008404219618600172, |
| "grad_norm": 0.359581857919693, |
| "learning_rate": 9.848101401577052e-06, |
| "loss": 0.5004, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.008433400936720311, |
| "grad_norm": 0.6436200737953186, |
| "learning_rate": 9.846971543793285e-06, |
| "loss": 1.0706, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.00846258225484045, |
| "grad_norm": 0.3004380464553833, |
| "learning_rate": 9.845837564788387e-06, |
| "loss": 0.4675, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.00849176357296059, |
| "grad_norm": 0.4008398652076721, |
| "learning_rate": 9.84469946552655e-06, |
| "loss": 0.6808, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.00852094489108073, |
| "grad_norm": 0.3354049623012543, |
| "learning_rate": 9.843557246975459e-06, |
| "loss": 0.4668, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.00855012620920087, |
| "grad_norm": 0.9540996551513672, |
| "learning_rate": 9.842410910106305e-06, |
| "loss": 0.6828, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.00857930752732101, |
| "grad_norm": 0.36251530051231384, |
| "learning_rate": 9.841260455893784e-06, |
| "loss": 1.7819, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.008608488845441149, |
| "grad_norm": 0.28926217555999756, |
| "learning_rate": 9.840105885316087e-06, |
| "loss": 0.4854, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.008637670163561288, |
| "grad_norm": 0.3726727068424225, |
| "learning_rate": 9.838947199354905e-06, |
| "loss": 0.5524, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.008666851481681427, |
| "grad_norm": 0.35486266016960144, |
| "learning_rate": 9.837784398995436e-06, |
| "loss": 1.1738, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.008696032799801566, |
| "grad_norm": 0.3808845281600952, |
| "learning_rate": 9.836617485226368e-06, |
| "loss": 1.1613, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.008725214117921707, |
| "grad_norm": 0.30906185507774353, |
| "learning_rate": 9.835446459039888e-06, |
| "loss": 0.6047, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.008754395436041847, |
| "grad_norm": 0.37905341386795044, |
| "learning_rate": 9.834271321431686e-06, |
| "loss": 0.5108, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.008783576754161986, |
| "grad_norm": 0.37152329087257385, |
| "learning_rate": 9.833092073400938e-06, |
| "loss": 1.2867, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.008812758072282125, |
| "grad_norm": 0.3889938294887543, |
| "learning_rate": 9.831908715950325e-06, |
| "loss": 0.516, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.008841939390402264, |
| "grad_norm": 0.31600430607795715, |
| "learning_rate": 9.830721250086011e-06, |
| "loss": 0.5475, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.008871120708522404, |
| "grad_norm": 0.36560872197151184, |
| "learning_rate": 9.829529676817664e-06, |
| "loss": 0.6687, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.008900302026642543, |
| "grad_norm": 0.33795249462127686, |
| "learning_rate": 9.828333997158438e-06, |
| "loss": 0.8979, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.008929483344762682, |
| "grad_norm": 0.37777137756347656, |
| "learning_rate": 9.827134212124983e-06, |
| "loss": 0.4487, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.008958664662882823, |
| "grad_norm": 0.38079383969306946, |
| "learning_rate": 9.825930322737433e-06, |
| "loss": 0.6556, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.008987845981002962, |
| "grad_norm": 0.35069790482521057, |
| "learning_rate": 9.824722330019416e-06, |
| "loss": 0.5511, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.009017027299123102, |
| "grad_norm": 0.3270156979560852, |
| "learning_rate": 9.823510234998052e-06, |
| "loss": 0.6456, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.009046208617243241, |
| "grad_norm": 0.3854398727416992, |
| "learning_rate": 9.822294038703942e-06, |
| "loss": 1.1342, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.00907538993536338, |
| "grad_norm": 0.5024343729019165, |
| "learning_rate": 9.821073742171179e-06, |
| "loss": 1.3163, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.00910457125348352, |
| "grad_norm": 0.3697148561477661, |
| "learning_rate": 9.819849346437342e-06, |
| "loss": 0.472, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.009133752571603659, |
| "grad_norm": 0.5523271560668945, |
| "learning_rate": 9.818620852543495e-06, |
| "loss": 0.8309, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.0091629338897238, |
| "grad_norm": 0.49423748254776, |
| "learning_rate": 9.817388261534185e-06, |
| "loss": 1.4531, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.009192115207843939, |
| "grad_norm": 0.40192165970802307, |
| "learning_rate": 9.816151574457444e-06, |
| "loss": 0.6268, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.009221296525964078, |
| "grad_norm": 0.374344140291214, |
| "learning_rate": 9.814910792364787e-06, |
| "loss": 0.7519, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.009250477844084217, |
| "grad_norm": 0.3867436945438385, |
| "learning_rate": 9.81366591631121e-06, |
| "loss": 0.5716, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.009279659162204357, |
| "grad_norm": 0.3703562319278717, |
| "learning_rate": 9.812416947355189e-06, |
| "loss": 0.5265, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.009308840480324496, |
| "grad_norm": 0.3638060390949249, |
| "learning_rate": 9.811163886558683e-06, |
| "loss": 0.6116, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.009338021798444635, |
| "grad_norm": 0.308459609746933, |
| "learning_rate": 9.80990673498713e-06, |
| "loss": 0.4987, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.009367203116564774, |
| "grad_norm": 0.4370526671409607, |
| "learning_rate": 9.80864549370944e-06, |
| "loss": 0.5645, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.009396384434684915, |
| "grad_norm": 0.3499382436275482, |
| "learning_rate": 9.807380163798009e-06, |
| "loss": 0.5573, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.009425565752805055, |
| "grad_norm": 0.4345581829547882, |
| "learning_rate": 9.806110746328705e-06, |
| "loss": 1.2744, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.009454747070925194, |
| "grad_norm": 0.3100895285606384, |
| "learning_rate": 9.804837242380873e-06, |
| "loss": 0.6461, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.009483928389045333, |
| "grad_norm": 0.5291584730148315, |
| "learning_rate": 9.803559653037328e-06, |
| "loss": 1.0081, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.009513109707165473, |
| "grad_norm": 0.3431501090526581, |
| "learning_rate": 9.802277979384367e-06, |
| "loss": 0.5404, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.009542291025285612, |
| "grad_norm": 0.38304755091667175, |
| "learning_rate": 9.800992222511753e-06, |
| "loss": 0.6265, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.009571472343405751, |
| "grad_norm": 0.3770524561405182, |
| "learning_rate": 9.799702383512721e-06, |
| "loss": 0.6301, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.00960065366152589, |
| "grad_norm": 0.3577704429626465, |
| "learning_rate": 9.798408463483982e-06, |
| "loss": 0.711, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.009629834979646031, |
| "grad_norm": 0.4507421851158142, |
| "learning_rate": 9.797110463525715e-06, |
| "loss": 1.2938, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.00965901629776617, |
| "grad_norm": 0.3144557774066925, |
| "learning_rate": 9.79580838474156e-06, |
| "loss": 0.5169, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.00968819761588631, |
| "grad_norm": 0.3704127371311188, |
| "learning_rate": 9.794502228238638e-06, |
| "loss": 0.7512, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.009717378934006449, |
| "grad_norm": 0.31256386637687683, |
| "learning_rate": 9.79319199512753e-06, |
| "loss": 1.0887, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.009746560252126588, |
| "grad_norm": 0.3950251042842865, |
| "learning_rate": 9.791877686522285e-06, |
| "loss": 1.4947, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.009775741570246728, |
| "grad_norm": 0.4597548544406891, |
| "learning_rate": 9.790559303540413e-06, |
| "loss": 1.3732, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.009804922888366867, |
| "grad_norm": 0.3754691481590271, |
| "learning_rate": 9.789236847302896e-06, |
| "loss": 0.4873, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.009834104206487008, |
| "grad_norm": 0.33539366722106934, |
| "learning_rate": 9.787910318934172e-06, |
| "loss": 0.5393, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.009863285524607147, |
| "grad_norm": 0.31377556920051575, |
| "learning_rate": 9.786579719562146e-06, |
| "loss": 0.4385, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.009892466842727286, |
| "grad_norm": 0.3356408178806305, |
| "learning_rate": 9.785245050318184e-06, |
| "loss": 0.5648, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.009921648160847426, |
| "grad_norm": 0.34536507725715637, |
| "learning_rate": 9.78390631233711e-06, |
| "loss": 0.4974, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.009950829478967565, |
| "grad_norm": 0.3652310371398926, |
| "learning_rate": 9.78256350675721e-06, |
| "loss": 1.1248, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.009980010797087704, |
| "grad_norm": 0.4672922194004059, |
| "learning_rate": 9.781216634720227e-06, |
| "loss": 1.1616, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.010009192115207843, |
| "grad_norm": 0.36882463097572327, |
| "learning_rate": 9.779865697371362e-06, |
| "loss": 0.5269, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.010038373433327983, |
| "grad_norm": 0.4571130573749542, |
| "learning_rate": 9.778510695859274e-06, |
| "loss": 0.8763, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.010067554751448124, |
| "grad_norm": 0.33660727739334106, |
| "learning_rate": 9.777151631336074e-06, |
| "loss": 0.5926, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.010096736069568263, |
| "grad_norm": 0.4739161431789398, |
| "learning_rate": 9.775788504957334e-06, |
| "loss": 0.7474, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.010125917387688402, |
| "grad_norm": 0.34859299659729004, |
| "learning_rate": 9.774421317882071e-06, |
| "loss": 0.6424, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.010155098705808541, |
| "grad_norm": 0.3759802579879761, |
| "learning_rate": 9.773050071272764e-06, |
| "loss": 0.6449, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.01018428002392868, |
| "grad_norm": 0.40361499786376953, |
| "learning_rate": 9.771674766295334e-06, |
| "loss": 1.6188, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.01021346134204882, |
| "grad_norm": 0.3608465790748596, |
| "learning_rate": 9.770295404119163e-06, |
| "loss": 0.5166, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.01024264266016896, |
| "grad_norm": 0.3379482626914978, |
| "learning_rate": 9.768911985917073e-06, |
| "loss": 0.5377, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.0102718239782891, |
| "grad_norm": 0.39209169149398804, |
| "learning_rate": 9.767524512865342e-06, |
| "loss": 0.8392, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.01030100529640924, |
| "grad_norm": 0.3134850859642029, |
| "learning_rate": 9.766132986143694e-06, |
| "loss": 0.6323, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.010330186614529379, |
| "grad_norm": 0.409471333026886, |
| "learning_rate": 9.764737406935295e-06, |
| "loss": 0.6607, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.010359367932649518, |
| "grad_norm": 0.40053945779800415, |
| "learning_rate": 9.763337776426762e-06, |
| "loss": 0.9634, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.010388549250769657, |
| "grad_norm": 0.5267995595932007, |
| "learning_rate": 9.761934095808156e-06, |
| "loss": 1.3353, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.010417730568889796, |
| "grad_norm": 0.3803673982620239, |
| "learning_rate": 9.760526366272978e-06, |
| "loss": 0.7678, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.010446911887009936, |
| "grad_norm": 0.3714295029640198, |
| "learning_rate": 9.759114589018178e-06, |
| "loss": 0.5707, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.010476093205130075, |
| "grad_norm": 0.34496983885765076, |
| "learning_rate": 9.75769876524414e-06, |
| "loss": 0.7947, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.010505274523250216, |
| "grad_norm": 0.32947367429733276, |
| "learning_rate": 9.756278896154693e-06, |
| "loss": 0.9273, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.010534455841370355, |
| "grad_norm": 0.399161159992218, |
| "learning_rate": 9.75485498295711e-06, |
| "loss": 1.1393, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.010563637159490494, |
| "grad_norm": 1.0698360204696655, |
| "learning_rate": 9.753427026862092e-06, |
| "loss": 1.2065, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.010592818477610634, |
| "grad_norm": 0.36978745460510254, |
| "learning_rate": 9.751995029083786e-06, |
| "loss": 1.0173, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.010621999795730773, |
| "grad_norm": 0.4568940997123718, |
| "learning_rate": 9.750558990839773e-06, |
| "loss": 0.6282, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.010651181113850912, |
| "grad_norm": 0.3994036614894867, |
| "learning_rate": 9.749118913351069e-06, |
| "loss": 0.8021, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.010680362431971051, |
| "grad_norm": 0.3851848244667053, |
| "learning_rate": 9.747674797842124e-06, |
| "loss": 0.6805, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.010709543750091192, |
| "grad_norm": 0.36664196848869324, |
| "learning_rate": 9.746226645540822e-06, |
| "loss": 0.6545, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.010738725068211332, |
| "grad_norm": 0.3806273937225342, |
| "learning_rate": 9.74477445767848e-06, |
| "loss": 0.7037, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.010767906386331471, |
| "grad_norm": 0.3356926143169403, |
| "learning_rate": 9.743318235489846e-06, |
| "loss": 0.5096, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.01079708770445161, |
| "grad_norm": 0.5911070108413696, |
| "learning_rate": 9.741857980213101e-06, |
| "loss": 0.9225, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.01082626902257175, |
| "grad_norm": 0.36224010586738586, |
| "learning_rate": 9.740393693089844e-06, |
| "loss": 0.6222, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.010855450340691889, |
| "grad_norm": 0.35221293568611145, |
| "learning_rate": 9.73892537536512e-06, |
| "loss": 0.4876, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.010884631658812028, |
| "grad_norm": 0.33988136053085327, |
| "learning_rate": 9.737453028287383e-06, |
| "loss": 0.7258, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.010913812976932167, |
| "grad_norm": 0.32662105560302734, |
| "learning_rate": 9.735976653108527e-06, |
| "loss": 0.558, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.010942994295052308, |
| "grad_norm": 0.33718347549438477, |
| "learning_rate": 9.734496251083865e-06, |
| "loss": 0.5788, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.010972175613172448, |
| "grad_norm": 0.38472211360931396, |
| "learning_rate": 9.733011823472131e-06, |
| "loss": 0.7207, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.011001356931292587, |
| "grad_norm": 0.3394148349761963, |
| "learning_rate": 9.731523371535488e-06, |
| "loss": 0.5383, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.011030538249412726, |
| "grad_norm": 0.3245641589164734, |
| "learning_rate": 9.730030896539518e-06, |
| "loss": 0.517, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.011059719567532865, |
| "grad_norm": 0.3804280459880829, |
| "learning_rate": 9.728534399753222e-06, |
| "loss": 0.5206, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.011088900885653005, |
| "grad_norm": 0.6100478172302246, |
| "learning_rate": 9.727033882449023e-06, |
| "loss": 0.7553, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.011118082203773144, |
| "grad_norm": 0.4965435266494751, |
| "learning_rate": 9.725529345902763e-06, |
| "loss": 0.574, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.011147263521893283, |
| "grad_norm": 0.31076616048812866, |
| "learning_rate": 9.724020791393698e-06, |
| "loss": 0.5868, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.011176444840013424, |
| "grad_norm": 0.3200540542602539, |
| "learning_rate": 9.722508220204501e-06, |
| "loss": 0.6465, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.011205626158133563, |
| "grad_norm": 0.44288474321365356, |
| "learning_rate": 9.720991633621268e-06, |
| "loss": 1.0584, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.011234807476253703, |
| "grad_norm": 0.33271703124046326, |
| "learning_rate": 9.719471032933496e-06, |
| "loss": 0.6802, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.011263988794373842, |
| "grad_norm": 0.3422086238861084, |
| "learning_rate": 9.717946419434108e-06, |
| "loss": 0.4469, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.011293170112493981, |
| "grad_norm": 0.46215254068374634, |
| "learning_rate": 9.716417794419428e-06, |
| "loss": 0.5714, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.01132235143061412, |
| "grad_norm": 0.32373300194740295, |
| "learning_rate": 9.714885159189198e-06, |
| "loss": 1.0706, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.01135153274873426, |
| "grad_norm": 0.3015748858451843, |
| "learning_rate": 9.713348515046566e-06, |
| "loss": 0.4271, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.0113807140668544, |
| "grad_norm": 0.6078203320503235, |
| "learning_rate": 9.711807863298092e-06, |
| "loss": 0.7983, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.01140989538497454, |
| "grad_norm": 0.44928136467933655, |
| "learning_rate": 9.710263205253743e-06, |
| "loss": 1.0152, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.011439076703094679, |
| "grad_norm": 0.3478546440601349, |
| "learning_rate": 9.708714542226887e-06, |
| "loss": 0.6882, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.011468258021214818, |
| "grad_norm": 0.6222825050354004, |
| "learning_rate": 9.707161875534304e-06, |
| "loss": 1.0782, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.011497439339334958, |
| "grad_norm": 0.36261993646621704, |
| "learning_rate": 9.705605206496176e-06, |
| "loss": 0.625, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.011526620657455097, |
| "grad_norm": 0.304210901260376, |
| "learning_rate": 9.704044536436085e-06, |
| "loss": 0.3843, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.011555801975575236, |
| "grad_norm": 0.35136616230010986, |
| "learning_rate": 9.702479866681023e-06, |
| "loss": 1.0645, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.011584983293695375, |
| "grad_norm": 0.35249078273773193, |
| "learning_rate": 9.700911198561371e-06, |
| "loss": 0.559, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.011614164611815516, |
| "grad_norm": 0.28680017590522766, |
| "learning_rate": 9.69933853341092e-06, |
| "loss": 0.4269, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.011643345929935656, |
| "grad_norm": 0.3262515962123871, |
| "learning_rate": 9.697761872566856e-06, |
| "loss": 0.5446, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.011672527248055795, |
| "grad_norm": 0.3692777156829834, |
| "learning_rate": 9.69618121736976e-06, |
| "loss": 0.5779, |
| "step": 400 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 3427, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 400, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.552468222017536e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|