| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.08170769073639056, |
| "eval_steps": 500, |
| "global_step": 2800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 2.9181318120139488e-05, |
| "grad_norm": 0.5635480880737305, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 0.8507, |
| "step": 1 |
| }, |
| { |
| "epoch": 5.8362636240278976e-05, |
| "grad_norm": 0.33079156279563904, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.1251, |
| "step": 2 |
| }, |
| { |
| "epoch": 8.754395436041846e-05, |
| "grad_norm": 0.3454552888870239, |
| "learning_rate": 1.5e-06, |
| "loss": 0.6399, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00011672527248055795, |
| "grad_norm": 0.4293176829814911, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.5054, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.00014590659060069743, |
| "grad_norm": 0.37919726967811584, |
| "learning_rate": 2.5e-06, |
| "loss": 0.735, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00017508790872083693, |
| "grad_norm": 0.6950544714927673, |
| "learning_rate": 3e-06, |
| "loss": 1.4197, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0002042692268409764, |
| "grad_norm": 0.38271600008010864, |
| "learning_rate": 3.5e-06, |
| "loss": 0.7222, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0002334505449611159, |
| "grad_norm": 0.3510509133338928, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.6049, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0002626318630812554, |
| "grad_norm": 0.29938340187072754, |
| "learning_rate": 4.5e-06, |
| "loss": 0.555, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.00029181318120139485, |
| "grad_norm": 0.38278627395629883, |
| "learning_rate": 5e-06, |
| "loss": 1.7384, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0003209944993215344, |
| "grad_norm": 0.3768065273761749, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 0.4364, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00035017581744167385, |
| "grad_norm": 0.3671921491622925, |
| "learning_rate": 6e-06, |
| "loss": 1.203, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.00037935713556181333, |
| "grad_norm": 0.3327710032463074, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 0.5083, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0004085384536819528, |
| "grad_norm": 0.35065436363220215, |
| "learning_rate": 7e-06, |
| "loss": 0.5446, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0004377197718020923, |
| "grad_norm": 0.40824198722839355, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.4587, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0004669010899222318, |
| "grad_norm": 0.34073805809020996, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.5617, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0004960824080423713, |
| "grad_norm": 0.3621309697628021, |
| "learning_rate": 8.5e-06, |
| "loss": 1.1623, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0005252637261625108, |
| "grad_norm": 0.31340083479881287, |
| "learning_rate": 9e-06, |
| "loss": 0.5276, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0005544450442826502, |
| "grad_norm": 0.36106982827186584, |
| "learning_rate": 9.5e-06, |
| "loss": 0.5017, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0005836263624027897, |
| "grad_norm": 0.31271892786026, |
| "learning_rate": 1e-05, |
| "loss": 0.3743, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0006128076805229292, |
| "grad_norm": 0.38480448722839355, |
| "learning_rate": 9.999997874331895e-06, |
| "loss": 1.1777, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0006419889986430688, |
| "grad_norm": 0.5181815028190613, |
| "learning_rate": 9.999991497329387e-06, |
| "loss": 0.8806, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0006711703167632082, |
| "grad_norm": 0.31474944949150085, |
| "learning_rate": 9.9999808689979e-06, |
| "loss": 0.6258, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0007003516348833477, |
| "grad_norm": 0.4090331494808197, |
| "learning_rate": 9.999965989346468e-06, |
| "loss": 0.5284, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0007295329530034872, |
| "grad_norm": 0.38257884979248047, |
| "learning_rate": 9.999946858387744e-06, |
| "loss": 0.8024, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0007587142711236267, |
| "grad_norm": 0.3842026889324188, |
| "learning_rate": 9.999923476137992e-06, |
| "loss": 1.1511, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0007878955892437661, |
| "grad_norm": 0.3617384433746338, |
| "learning_rate": 9.999895842617097e-06, |
| "loss": 0.6226, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0008170769073639056, |
| "grad_norm": 0.36202019453048706, |
| "learning_rate": 9.999863957848556e-06, |
| "loss": 1.1775, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0008462582254840451, |
| "grad_norm": 0.40996360778808594, |
| "learning_rate": 9.999827821859475e-06, |
| "loss": 0.9613, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0008754395436041846, |
| "grad_norm": 0.38747870922088623, |
| "learning_rate": 9.999787434680581e-06, |
| "loss": 0.7427, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0009046208617243241, |
| "grad_norm": 0.3686698377132416, |
| "learning_rate": 9.999742796346215e-06, |
| "loss": 1.8661, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0009338021798444636, |
| "grad_norm": 0.4100247323513031, |
| "learning_rate": 9.99969390689433e-06, |
| "loss": 0.7473, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0009629834979646031, |
| "grad_norm": 0.3075491189956665, |
| "learning_rate": 9.999640766366496e-06, |
| "loss": 0.352, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.0009921648160847426, |
| "grad_norm": 0.3251459002494812, |
| "learning_rate": 9.999583374807895e-06, |
| "loss": 1.0352, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.001021346134204882, |
| "grad_norm": 0.31156864762306213, |
| "learning_rate": 9.999521732267327e-06, |
| "loss": 0.4442, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0010505274523250215, |
| "grad_norm": 0.7785539627075195, |
| "learning_rate": 9.999455838797207e-06, |
| "loss": 1.0465, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.001079708770445161, |
| "grad_norm": 0.3474777936935425, |
| "learning_rate": 9.999385694453557e-06, |
| "loss": 0.5922, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0011088900885653005, |
| "grad_norm": 0.3285025656223297, |
| "learning_rate": 9.99931129929602e-06, |
| "loss": 0.6125, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.00113807140668544, |
| "grad_norm": 0.31099236011505127, |
| "learning_rate": 9.999232653387854e-06, |
| "loss": 0.8929, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0011672527248055794, |
| "grad_norm": 0.9824883937835693, |
| "learning_rate": 9.999149756795927e-06, |
| "loss": 1.0796, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0011964340429257189, |
| "grad_norm": 0.37850892543792725, |
| "learning_rate": 9.999062609590723e-06, |
| "loss": 1.1134, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0012256153610458584, |
| "grad_norm": 0.35737380385398865, |
| "learning_rate": 9.998971211846343e-06, |
| "loss": 1.9588, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0012547966791659978, |
| "grad_norm": 0.5358197689056396, |
| "learning_rate": 9.998875563640495e-06, |
| "loss": 0.5388, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.0012839779972861375, |
| "grad_norm": 0.3402862846851349, |
| "learning_rate": 9.99877566505451e-06, |
| "loss": 0.5397, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.001313159315406277, |
| "grad_norm": 0.36641523241996765, |
| "learning_rate": 9.998671516173327e-06, |
| "loss": 0.5417, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0013423406335264165, |
| "grad_norm": 0.3002683222293854, |
| "learning_rate": 9.9985631170855e-06, |
| "loss": 0.5729, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.001371521951646556, |
| "grad_norm": 0.5055456161499023, |
| "learning_rate": 9.998450467883196e-06, |
| "loss": 0.5473, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0014007032697666954, |
| "grad_norm": 0.4431580603122711, |
| "learning_rate": 9.998333568662199e-06, |
| "loss": 1.6627, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0014298845878868349, |
| "grad_norm": 0.31471818685531616, |
| "learning_rate": 9.998212419521905e-06, |
| "loss": 0.359, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.0014590659060069744, |
| "grad_norm": 0.4176453649997711, |
| "learning_rate": 9.998087020565319e-06, |
| "loss": 0.9098, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0014882472241271138, |
| "grad_norm": 0.3488178253173828, |
| "learning_rate": 9.997957371899069e-06, |
| "loss": 0.5606, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.0015174285422472533, |
| "grad_norm": 0.3195094168186188, |
| "learning_rate": 9.997823473633388e-06, |
| "loss": 0.5484, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0015466098603673928, |
| "grad_norm": 0.3707164525985718, |
| "learning_rate": 9.997685325882125e-06, |
| "loss": 1.0832, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.0015757911784875323, |
| "grad_norm": 0.5281696319580078, |
| "learning_rate": 9.997542928762745e-06, |
| "loss": 0.4327, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0016049724966076717, |
| "grad_norm": 0.38910332322120667, |
| "learning_rate": 9.997396282396322e-06, |
| "loss": 0.563, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0016341538147278112, |
| "grad_norm": 0.5761558413505554, |
| "learning_rate": 9.997245386907541e-06, |
| "loss": 1.533, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0016633351328479507, |
| "grad_norm": 0.3254339098930359, |
| "learning_rate": 9.997090242424711e-06, |
| "loss": 0.4337, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.0016925164509680902, |
| "grad_norm": 0.5125793814659119, |
| "learning_rate": 9.996930849079741e-06, |
| "loss": 0.8955, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.0017216977690882296, |
| "grad_norm": 0.4197414517402649, |
| "learning_rate": 9.99676720700816e-06, |
| "loss": 2.1629, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.001750879087208369, |
| "grad_norm": 0.4802038371562958, |
| "learning_rate": 9.996599316349105e-06, |
| "loss": 0.43, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0017800604053285088, |
| "grad_norm": 0.30398809909820557, |
| "learning_rate": 9.99642717724533e-06, |
| "loss": 1.0333, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0018092417234486483, |
| "grad_norm": 0.29787677526474, |
| "learning_rate": 9.996250789843203e-06, |
| "loss": 0.4196, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.0018384230415687877, |
| "grad_norm": 0.5412101745605469, |
| "learning_rate": 9.996070154292691e-06, |
| "loss": 0.9048, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.0018676043596889272, |
| "grad_norm": 0.6284111738204956, |
| "learning_rate": 9.995885270747393e-06, |
| "loss": 0.4706, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.0018967856778090667, |
| "grad_norm": 0.43187353014945984, |
| "learning_rate": 9.9956961393645e-06, |
| "loss": 1.2573, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.0019259669959292062, |
| "grad_norm": 0.3548312485218048, |
| "learning_rate": 9.995502760304829e-06, |
| "loss": 0.6652, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.0019551483140493454, |
| "grad_norm": 0.3329358398914337, |
| "learning_rate": 9.995305133732805e-06, |
| "loss": 0.4759, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.001984329632169485, |
| "grad_norm": 0.3695685565471649, |
| "learning_rate": 9.99510325981646e-06, |
| "loss": 1.257, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.0020135109502896244, |
| "grad_norm": 0.3101709485054016, |
| "learning_rate": 9.994897138727446e-06, |
| "loss": 0.5615, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.002042692268409764, |
| "grad_norm": 0.5336629152297974, |
| "learning_rate": 9.994686770641015e-06, |
| "loss": 0.8056, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0020718735865299038, |
| "grad_norm": 0.31102678179740906, |
| "learning_rate": 9.994472155736039e-06, |
| "loss": 1.0285, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.002101054904650043, |
| "grad_norm": 0.31015750765800476, |
| "learning_rate": 9.994253294194998e-06, |
| "loss": 0.6796, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.0021302362227701827, |
| "grad_norm": 1.129408597946167, |
| "learning_rate": 9.994030186203983e-06, |
| "loss": 0.9488, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.002159417540890322, |
| "grad_norm": 0.3322198987007141, |
| "learning_rate": 9.993802831952692e-06, |
| "loss": 1.6218, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0021885988590104617, |
| "grad_norm": 0.36219432950019836, |
| "learning_rate": 9.993571231634444e-06, |
| "loss": 0.4795, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.002217780177130601, |
| "grad_norm": 0.3242811858654022, |
| "learning_rate": 9.993335385446155e-06, |
| "loss": 0.4375, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.0022469614952507406, |
| "grad_norm": 0.5745208859443665, |
| "learning_rate": 9.993095293588359e-06, |
| "loss": 1.3936, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.00227614281337088, |
| "grad_norm": 0.3800385594367981, |
| "learning_rate": 9.992850956265198e-06, |
| "loss": 0.5863, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0023053241314910196, |
| "grad_norm": 0.38720518350601196, |
| "learning_rate": 9.992602373684426e-06, |
| "loss": 0.6313, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.002334505449611159, |
| "grad_norm": 0.5431109070777893, |
| "learning_rate": 9.992349546057403e-06, |
| "loss": 0.7509, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0023636867677312985, |
| "grad_norm": 0.36960089206695557, |
| "learning_rate": 9.9920924735991e-06, |
| "loss": 0.9468, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0023928680858514378, |
| "grad_norm": 0.3455142080783844, |
| "learning_rate": 9.991831156528095e-06, |
| "loss": 0.6766, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.0024220494039715774, |
| "grad_norm": 0.288099080324173, |
| "learning_rate": 9.991565595066582e-06, |
| "loss": 0.3018, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.0024512307220917167, |
| "grad_norm": 0.3686278164386749, |
| "learning_rate": 9.991295789440357e-06, |
| "loss": 0.6696, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.0024804120402118564, |
| "grad_norm": 0.28621384501457214, |
| "learning_rate": 9.991021739878828e-06, |
| "loss": 0.4126, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.0025095933583319957, |
| "grad_norm": 0.33376601338386536, |
| "learning_rate": 9.990743446615008e-06, |
| "loss": 0.6864, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.0025387746764521353, |
| "grad_norm": 0.36860471963882446, |
| "learning_rate": 9.990460909885522e-06, |
| "loss": 0.5158, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.002567955994572275, |
| "grad_norm": 0.4445946216583252, |
| "learning_rate": 9.9901741299306e-06, |
| "loss": 1.2633, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.0025971373126924143, |
| "grad_norm": 0.29571008682250977, |
| "learning_rate": 9.989883106994086e-06, |
| "loss": 0.9292, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.002626318630812554, |
| "grad_norm": 0.3927260935306549, |
| "learning_rate": 9.989587841323423e-06, |
| "loss": 1.0559, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0026554999489326932, |
| "grad_norm": 0.36762961745262146, |
| "learning_rate": 9.98928833316967e-06, |
| "loss": 0.913, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.002684681267052833, |
| "grad_norm": 0.3317353427410126, |
| "learning_rate": 9.988984582787482e-06, |
| "loss": 0.6172, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.002713862585172972, |
| "grad_norm": 0.465626984834671, |
| "learning_rate": 9.988676590435133e-06, |
| "loss": 0.7472, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.002743043903293112, |
| "grad_norm": 1.3269602060317993, |
| "learning_rate": 9.9883643563745e-06, |
| "loss": 0.6623, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.002772225221413251, |
| "grad_norm": 0.4574294984340668, |
| "learning_rate": 9.988047880871063e-06, |
| "loss": 1.3339, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.002801406539533391, |
| "grad_norm": 0.34987109899520874, |
| "learning_rate": 9.98772716419391e-06, |
| "loss": 0.6319, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.00283058785765353, |
| "grad_norm": 0.402045875787735, |
| "learning_rate": 9.98740220661574e-06, |
| "loss": 0.6483, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.0028597691757736698, |
| "grad_norm": 0.29255297780036926, |
| "learning_rate": 9.987073008412847e-06, |
| "loss": 0.5413, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.002888950493893809, |
| "grad_norm": 0.3598422110080719, |
| "learning_rate": 9.986739569865143e-06, |
| "loss": 0.595, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.0029181318120139487, |
| "grad_norm": 0.38826480507850647, |
| "learning_rate": 9.986401891256139e-06, |
| "loss": 0.6546, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.002947313130134088, |
| "grad_norm": 0.42309150099754333, |
| "learning_rate": 9.98605997287295e-06, |
| "loss": 1.0045, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.0029764944482542277, |
| "grad_norm": 0.4501197338104248, |
| "learning_rate": 9.9857138150063e-06, |
| "loss": 0.8104, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.003005675766374367, |
| "grad_norm": 0.3553255796432495, |
| "learning_rate": 9.985363417950515e-06, |
| "loss": 0.6416, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.0030348570844945066, |
| "grad_norm": 0.30859172344207764, |
| "learning_rate": 9.985008782003524e-06, |
| "loss": 0.5201, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.0030640384026146463, |
| "grad_norm": 0.3903139531612396, |
| "learning_rate": 9.984649907466868e-06, |
| "loss": 0.8591, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.0030932197207347856, |
| "grad_norm": 0.3806699514389038, |
| "learning_rate": 9.98428679464568e-06, |
| "loss": 0.6575, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.0031224010388549253, |
| "grad_norm": 0.7828800082206726, |
| "learning_rate": 9.983919443848706e-06, |
| "loss": 0.63, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.0031515823569750645, |
| "grad_norm": 0.32453709840774536, |
| "learning_rate": 9.98354785538829e-06, |
| "loss": 0.489, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.003180763675095204, |
| "grad_norm": 0.4170776307582855, |
| "learning_rate": 9.983172029580387e-06, |
| "loss": 1.1076, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.0032099449932153435, |
| "grad_norm": 0.3523752689361572, |
| "learning_rate": 9.982791966744545e-06, |
| "loss": 0.6405, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.003239126311335483, |
| "grad_norm": 0.32706937193870544, |
| "learning_rate": 9.98240766720392e-06, |
| "loss": 1.0937, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.0032683076294556224, |
| "grad_norm": 0.3406533896923065, |
| "learning_rate": 9.982019131285268e-06, |
| "loss": 1.3389, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.003297488947575762, |
| "grad_norm": 0.376828134059906, |
| "learning_rate": 9.98162635931895e-06, |
| "loss": 0.636, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.0033266702656959014, |
| "grad_norm": 0.35637038946151733, |
| "learning_rate": 9.981229351638926e-06, |
| "loss": 0.4319, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.003355851583816041, |
| "grad_norm": 0.5207456350326538, |
| "learning_rate": 9.980828108582759e-06, |
| "loss": 0.6011, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0033850329019361803, |
| "grad_norm": 0.464778333902359, |
| "learning_rate": 9.980422630491614e-06, |
| "loss": 0.7913, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.00341421422005632, |
| "grad_norm": 0.3464951515197754, |
| "learning_rate": 9.980012917710254e-06, |
| "loss": 0.5774, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.0034433955381764593, |
| "grad_norm": 0.36604249477386475, |
| "learning_rate": 9.979598970587046e-06, |
| "loss": 0.7515, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.003472576856296599, |
| "grad_norm": 0.35948342084884644, |
| "learning_rate": 9.979180789473955e-06, |
| "loss": 0.4906, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.003501758174416738, |
| "grad_norm": 0.3790506422519684, |
| "learning_rate": 9.978758374726544e-06, |
| "loss": 0.7257, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.003530939492536878, |
| "grad_norm": 0.36446383595466614, |
| "learning_rate": 9.978331726703984e-06, |
| "loss": 0.6115, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.0035601208106570176, |
| "grad_norm": 0.4974438548088074, |
| "learning_rate": 9.977900845769037e-06, |
| "loss": 0.6018, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.003589302128777157, |
| "grad_norm": 0.4783862233161926, |
| "learning_rate": 9.977465732288065e-06, |
| "loss": 0.7256, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.0036184834468972965, |
| "grad_norm": 0.35444504022598267, |
| "learning_rate": 9.977026386631032e-06, |
| "loss": 0.5428, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.003647664765017436, |
| "grad_norm": 0.44525983929634094, |
| "learning_rate": 9.9765828091715e-06, |
| "loss": 0.7336, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.0036768460831375755, |
| "grad_norm": 0.32913491129875183, |
| "learning_rate": 9.97613500028663e-06, |
| "loss": 0.6213, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.0037060274012577147, |
| "grad_norm": 0.3486779034137726, |
| "learning_rate": 9.975682960357176e-06, |
| "loss": 1.1807, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.0037352087193778544, |
| "grad_norm": 0.2928440570831299, |
| "learning_rate": 9.975226689767494e-06, |
| "loss": 0.5046, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.0037643900374979937, |
| "grad_norm": 0.3638307750225067, |
| "learning_rate": 9.974766188905535e-06, |
| "loss": 1.2703, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.0037935713556181334, |
| "grad_norm": 0.478950560092926, |
| "learning_rate": 9.97430145816285e-06, |
| "loss": 1.1034, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0038227526737382726, |
| "grad_norm": 0.5774679183959961, |
| "learning_rate": 9.973832497934583e-06, |
| "loss": 0.5785, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.0038519339918584123, |
| "grad_norm": 0.3301682770252228, |
| "learning_rate": 9.973359308619476e-06, |
| "loss": 0.6012, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.0038811153099785516, |
| "grad_norm": 0.4451266825199127, |
| "learning_rate": 9.972881890619865e-06, |
| "loss": 0.6879, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.003910296628098691, |
| "grad_norm": 0.6361525654792786, |
| "learning_rate": 9.972400244341685e-06, |
| "loss": 0.8636, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.003939477946218831, |
| "grad_norm": 0.3009544909000397, |
| "learning_rate": 9.971914370194462e-06, |
| "loss": 0.5197, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.00396865926433897, |
| "grad_norm": 0.36018285155296326, |
| "learning_rate": 9.97142426859132e-06, |
| "loss": 0.6695, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.0039978405824591095, |
| "grad_norm": 0.4810916781425476, |
| "learning_rate": 9.970929939948978e-06, |
| "loss": 0.9842, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.004027021900579249, |
| "grad_norm": 0.3791263699531555, |
| "learning_rate": 9.970431384687741e-06, |
| "loss": 0.6019, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.004056203218699389, |
| "grad_norm": 0.35318222641944885, |
| "learning_rate": 9.969928603231523e-06, |
| "loss": 1.1406, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.004085384536819528, |
| "grad_norm": 0.42550453543663025, |
| "learning_rate": 9.969421596007817e-06, |
| "loss": 0.7477, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.004114565854939667, |
| "grad_norm": 0.4141107201576233, |
| "learning_rate": 9.968910363447715e-06, |
| "loss": 1.1222, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0041437471730598075, |
| "grad_norm": 0.2917640507221222, |
| "learning_rate": 9.968394905985905e-06, |
| "loss": 0.3913, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.004172928491179947, |
| "grad_norm": 0.38363099098205566, |
| "learning_rate": 9.967875224060658e-06, |
| "loss": 1.0972, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.004202109809300086, |
| "grad_norm": 0.5850480794906616, |
| "learning_rate": 9.967351318113847e-06, |
| "loss": 0.7765, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.004231291127420225, |
| "grad_norm": 0.31228914856910706, |
| "learning_rate": 9.96682318859093e-06, |
| "loss": 0.4522, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.004260472445540365, |
| "grad_norm": 0.45077505707740784, |
| "learning_rate": 9.96629083594096e-06, |
| "loss": 0.7652, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.004289653763660505, |
| "grad_norm": 0.32041504979133606, |
| "learning_rate": 9.965754260616576e-06, |
| "loss": 0.5541, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.004318835081780644, |
| "grad_norm": 0.34780099987983704, |
| "learning_rate": 9.965213463074013e-06, |
| "loss": 0.8193, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.004348016399900783, |
| "grad_norm": 0.36673223972320557, |
| "learning_rate": 9.964668443773094e-06, |
| "loss": 1.1096, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.004377197718020923, |
| "grad_norm": 0.4030401110649109, |
| "learning_rate": 9.964119203177228e-06, |
| "loss": 0.7801, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0044063790361410626, |
| "grad_norm": 0.5267347693443298, |
| "learning_rate": 9.963565741753418e-06, |
| "loss": 0.6537, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.004435560354261202, |
| "grad_norm": 0.6500905156135559, |
| "learning_rate": 9.963008059972255e-06, |
| "loss": 0.6598, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.004464741672381341, |
| "grad_norm": 0.44937339425086975, |
| "learning_rate": 9.962446158307914e-06, |
| "loss": 0.9024, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.004493922990501481, |
| "grad_norm": 0.47618094086647034, |
| "learning_rate": 9.961880037238168e-06, |
| "loss": 0.6525, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0045231043086216205, |
| "grad_norm": 0.811037003993988, |
| "learning_rate": 9.961309697244366e-06, |
| "loss": 1.2114, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.00455228562674176, |
| "grad_norm": 0.30810266733169556, |
| "learning_rate": 9.960735138811451e-06, |
| "loss": 0.6901, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0045814669448619, |
| "grad_norm": 0.4555955231189728, |
| "learning_rate": 9.960156362427949e-06, |
| "loss": 0.822, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.004610648262982039, |
| "grad_norm": 0.3224095106124878, |
| "learning_rate": 9.959573368585979e-06, |
| "loss": 0.5978, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.004639829581102178, |
| "grad_norm": 0.37293335795402527, |
| "learning_rate": 9.95898615778124e-06, |
| "loss": 0.6142, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.004669010899222318, |
| "grad_norm": 0.44633975625038147, |
| "learning_rate": 9.958394730513014e-06, |
| "loss": 0.5834, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.004698192217342458, |
| "grad_norm": 0.3741106688976288, |
| "learning_rate": 9.957799087284177e-06, |
| "loss": 0.7021, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.004727373535462597, |
| "grad_norm": 0.35581302642822266, |
| "learning_rate": 9.957199228601183e-06, |
| "loss": 0.7703, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.004756554853582736, |
| "grad_norm": 0.3660070300102234, |
| "learning_rate": 9.956595154974073e-06, |
| "loss": 1.0133, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.0047857361717028755, |
| "grad_norm": 0.35619139671325684, |
| "learning_rate": 9.955986866916472e-06, |
| "loss": 0.5734, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.004814917489823016, |
| "grad_norm": 0.3273680806159973, |
| "learning_rate": 9.955374364945585e-06, |
| "loss": 1.1949, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.004844098807943155, |
| "grad_norm": 0.3657272458076477, |
| "learning_rate": 9.954757649582202e-06, |
| "loss": 0.4649, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.004873280126063294, |
| "grad_norm": 0.3416500985622406, |
| "learning_rate": 9.9541367213507e-06, |
| "loss": 0.817, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.004902461444183433, |
| "grad_norm": 0.3144441545009613, |
| "learning_rate": 9.95351158077903e-06, |
| "loss": 0.6406, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.0049316427623035735, |
| "grad_norm": 0.4005574584007263, |
| "learning_rate": 9.952882228398731e-06, |
| "loss": 0.7143, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.004960824080423713, |
| "grad_norm": 0.45650023221969604, |
| "learning_rate": 9.952248664744919e-06, |
| "loss": 0.565, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.004990005398543852, |
| "grad_norm": 0.3257487416267395, |
| "learning_rate": 9.951610890356291e-06, |
| "loss": 0.5586, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.005019186716663991, |
| "grad_norm": 0.3424636721611023, |
| "learning_rate": 9.95096890577513e-06, |
| "loss": 1.0405, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.005048368034784131, |
| "grad_norm": 0.45603424310684204, |
| "learning_rate": 9.950322711547292e-06, |
| "loss": 0.7305, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.005077549352904271, |
| "grad_norm": 0.4074293375015259, |
| "learning_rate": 9.949672308222214e-06, |
| "loss": 0.5625, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.00510673067102441, |
| "grad_norm": 0.40585842728614807, |
| "learning_rate": 9.949017696352914e-06, |
| "loss": 0.8139, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.00513591198914455, |
| "grad_norm": 0.3102453351020813, |
| "learning_rate": 9.948358876495985e-06, |
| "loss": 0.7125, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.005165093307264689, |
| "grad_norm": 0.3370908796787262, |
| "learning_rate": 9.947695849211603e-06, |
| "loss": 0.5844, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.005194274625384829, |
| "grad_norm": 0.3051895499229431, |
| "learning_rate": 9.947028615063515e-06, |
| "loss": 0.5174, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.005223455943504968, |
| "grad_norm": 0.3698843717575073, |
| "learning_rate": 9.946357174619052e-06, |
| "loss": 0.4539, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.005252637261625108, |
| "grad_norm": 0.29240456223487854, |
| "learning_rate": 9.945681528449116e-06, |
| "loss": 0.5571, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.005281818579745247, |
| "grad_norm": 0.3231453001499176, |
| "learning_rate": 9.945001677128185e-06, |
| "loss": 1.1014, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.0053109998978653865, |
| "grad_norm": 0.3621669113636017, |
| "learning_rate": 9.944317621234318e-06, |
| "loss": 0.6259, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.005340181215985526, |
| "grad_norm": 0.3420839011669159, |
| "learning_rate": 9.943629361349143e-06, |
| "loss": 0.6329, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.005369362534105666, |
| "grad_norm": 0.45090600848197937, |
| "learning_rate": 9.942936898057866e-06, |
| "loss": 1.0192, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.005398543852225805, |
| "grad_norm": 0.32308369874954224, |
| "learning_rate": 9.942240231949263e-06, |
| "loss": 0.5465, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.005427725170345944, |
| "grad_norm": 0.34121567010879517, |
| "learning_rate": 9.94153936361569e-06, |
| "loss": 0.6829, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.005456906488466084, |
| "grad_norm": 0.3925750255584717, |
| "learning_rate": 9.940834293653071e-06, |
| "loss": 1.0301, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.005486087806586224, |
| "grad_norm": 0.40466272830963135, |
| "learning_rate": 9.940125022660903e-06, |
| "loss": 0.8774, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.005515269124706363, |
| "grad_norm": 0.3522239625453949, |
| "learning_rate": 9.939411551242258e-06, |
| "loss": 0.7484, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.005544450442826502, |
| "grad_norm": 0.28628867864608765, |
| "learning_rate": 9.938693880003775e-06, |
| "loss": 0.7138, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0055736317609466415, |
| "grad_norm": 0.3318323791027069, |
| "learning_rate": 9.937972009555667e-06, |
| "loss": 0.514, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.005602813079066782, |
| "grad_norm": 0.34792056679725647, |
| "learning_rate": 9.937245940511719e-06, |
| "loss": 0.5826, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.005631994397186921, |
| "grad_norm": 0.3346732556819916, |
| "learning_rate": 9.93651567348928e-06, |
| "loss": 0.4349, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.00566117571530706, |
| "grad_norm": 0.37206366658210754, |
| "learning_rate": 9.935781209109274e-06, |
| "loss": 1.2471, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.0056903570334272, |
| "grad_norm": 0.4244491755962372, |
| "learning_rate": 9.935042547996194e-06, |
| "loss": 0.9581, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.0057195383515473396, |
| "grad_norm": 0.35628461837768555, |
| "learning_rate": 9.934299690778096e-06, |
| "loss": 0.4569, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.005748719669667479, |
| "grad_norm": 0.32731014490127563, |
| "learning_rate": 9.933552638086607e-06, |
| "loss": 0.5726, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.005777900987787618, |
| "grad_norm": 0.3174387514591217, |
| "learning_rate": 9.932801390556926e-06, |
| "loss": 1.3027, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.005807082305907758, |
| "grad_norm": 0.3262479305267334, |
| "learning_rate": 9.932045948827809e-06, |
| "loss": 1.1214, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.0058362636240278975, |
| "grad_norm": 0.3247474431991577, |
| "learning_rate": 9.931286313541586e-06, |
| "loss": 0.4761, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.005865444942148037, |
| "grad_norm": 0.38119345903396606, |
| "learning_rate": 9.930522485344149e-06, |
| "loss": 0.882, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.005894626260268176, |
| "grad_norm": 0.29767370223999023, |
| "learning_rate": 9.929754464884958e-06, |
| "loss": 0.6177, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.005923807578388316, |
| "grad_norm": 0.4254716634750366, |
| "learning_rate": 9.928982252817032e-06, |
| "loss": 0.6291, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.005952988896508455, |
| "grad_norm": 0.39530616998672485, |
| "learning_rate": 9.928205849796963e-06, |
| "loss": 0.9351, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.005982170214628595, |
| "grad_norm": 0.6373737454414368, |
| "learning_rate": 9.927425256484894e-06, |
| "loss": 0.6, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.006011351532748734, |
| "grad_norm": 0.37412020564079285, |
| "learning_rate": 9.926640473544545e-06, |
| "loss": 0.6374, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.006040532850868874, |
| "grad_norm": 0.33179807662963867, |
| "learning_rate": 9.925851501643186e-06, |
| "loss": 0.715, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.006069714168989013, |
| "grad_norm": 0.3322802186012268, |
| "learning_rate": 9.925058341451659e-06, |
| "loss": 0.6499, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.0060988954871091525, |
| "grad_norm": 0.34102705121040344, |
| "learning_rate": 9.924260993644357e-06, |
| "loss": 0.4996, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.006128076805229293, |
| "grad_norm": 0.3628920614719391, |
| "learning_rate": 9.92345945889924e-06, |
| "loss": 1.0272, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.006157258123349432, |
| "grad_norm": 0.3197779357433319, |
| "learning_rate": 9.92265373789783e-06, |
| "loss": 0.4657, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.006186439441469571, |
| "grad_norm": 0.3385450839996338, |
| "learning_rate": 9.9218438313252e-06, |
| "loss": 0.5419, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.00621562075958971, |
| "grad_norm": 0.6793949604034424, |
| "learning_rate": 9.921029739869993e-06, |
| "loss": 1.5859, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.0062448020777098505, |
| "grad_norm": 0.3696063756942749, |
| "learning_rate": 9.920211464224398e-06, |
| "loss": 0.5275, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.00627398339582999, |
| "grad_norm": 0.3089805841445923, |
| "learning_rate": 9.919389005084173e-06, |
| "loss": 1.0725, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.006303164713950129, |
| "grad_norm": 0.35823842883110046, |
| "learning_rate": 9.918562363148625e-06, |
| "loss": 0.58, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.006332346032070268, |
| "grad_norm": 0.35605373978614807, |
| "learning_rate": 9.917731539120623e-06, |
| "loss": 0.8045, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.006361527350190408, |
| "grad_norm": 0.9635096192359924, |
| "learning_rate": 9.916896533706587e-06, |
| "loss": 0.6127, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.006390708668310548, |
| "grad_norm": 0.44269976019859314, |
| "learning_rate": 9.916057347616496e-06, |
| "loss": 1.1253, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.006419889986430687, |
| "grad_norm": 0.35200875997543335, |
| "learning_rate": 9.915213981563882e-06, |
| "loss": 0.7173, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.006449071304550826, |
| "grad_norm": 0.3846692442893982, |
| "learning_rate": 9.914366436265834e-06, |
| "loss": 0.625, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.006478252622670966, |
| "grad_norm": 1.3158338069915771, |
| "learning_rate": 9.913514712442987e-06, |
| "loss": 0.4931, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.006507433940791106, |
| "grad_norm": 0.3677213191986084, |
| "learning_rate": 9.912658810819537e-06, |
| "loss": 0.6044, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.006536615258911245, |
| "grad_norm": 0.37323689460754395, |
| "learning_rate": 9.911798732123231e-06, |
| "loss": 0.8516, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.006565796577031384, |
| "grad_norm": 0.41386744379997253, |
| "learning_rate": 9.910934477085363e-06, |
| "loss": 0.633, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.006594977895151524, |
| "grad_norm": 0.3256453573703766, |
| "learning_rate": 9.91006604644078e-06, |
| "loss": 0.5754, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.0066241592132716635, |
| "grad_norm": 0.42724573612213135, |
| "learning_rate": 9.909193440927882e-06, |
| "loss": 0.9702, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.006653340531391803, |
| "grad_norm": 0.30440038442611694, |
| "learning_rate": 9.908316661288617e-06, |
| "loss": 0.6389, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.006682521849511943, |
| "grad_norm": 0.3871992528438568, |
| "learning_rate": 9.907435708268483e-06, |
| "loss": 1.7041, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.006711703167632082, |
| "grad_norm": 0.3268488049507141, |
| "learning_rate": 9.906550582616521e-06, |
| "loss": 1.0783, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.006740884485752221, |
| "grad_norm": 0.3559863865375519, |
| "learning_rate": 9.90566128508533e-06, |
| "loss": 1.0627, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.006770065803872361, |
| "grad_norm": 0.3503190577030182, |
| "learning_rate": 9.904767816431043e-06, |
| "loss": 1.1182, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.006799247121992501, |
| "grad_norm": 0.41551750898361206, |
| "learning_rate": 9.903870177413354e-06, |
| "loss": 1.0803, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.00682842844011264, |
| "grad_norm": 0.32648736238479614, |
| "learning_rate": 9.902968368795496e-06, |
| "loss": 0.4153, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.006857609758232779, |
| "grad_norm": 0.5350513458251953, |
| "learning_rate": 9.902062391344245e-06, |
| "loss": 1.5776, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.0068867910763529185, |
| "grad_norm": 0.4450839161872864, |
| "learning_rate": 9.901152245829922e-06, |
| "loss": 0.4149, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.006915972394473059, |
| "grad_norm": 0.31524381041526794, |
| "learning_rate": 9.900237933026397e-06, |
| "loss": 0.5053, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.006945153712593198, |
| "grad_norm": 0.36861109733581543, |
| "learning_rate": 9.899319453711081e-06, |
| "loss": 1.1878, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.006974335030713337, |
| "grad_norm": 0.43096283078193665, |
| "learning_rate": 9.898396808664924e-06, |
| "loss": 0.827, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.007003516348833476, |
| "grad_norm": 0.4168417453765869, |
| "learning_rate": 9.89746999867242e-06, |
| "loss": 1.0594, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.0070326976669536165, |
| "grad_norm": 0.7598991394042969, |
| "learning_rate": 9.89653902452161e-06, |
| "loss": 1.001, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.007061878985073756, |
| "grad_norm": 0.4895627796649933, |
| "learning_rate": 9.895603887004068e-06, |
| "loss": 1.327, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.007091060303193895, |
| "grad_norm": 0.5062536597251892, |
| "learning_rate": 9.894664586914911e-06, |
| "loss": 0.9793, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.007120241621314035, |
| "grad_norm": 0.3142737150192261, |
| "learning_rate": 9.893721125052794e-06, |
| "loss": 0.6362, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.0071494229394341744, |
| "grad_norm": 0.316954642534256, |
| "learning_rate": 9.892773502219913e-06, |
| "loss": 0.3909, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.007178604257554314, |
| "grad_norm": 0.3828858733177185, |
| "learning_rate": 9.891821719222e-06, |
| "loss": 0.529, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.007207785575674453, |
| "grad_norm": 0.35143372416496277, |
| "learning_rate": 9.890865776868324e-06, |
| "loss": 1.0562, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.007236966893794593, |
| "grad_norm": 0.4023430347442627, |
| "learning_rate": 9.889905675971694e-06, |
| "loss": 0.729, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.007266148211914732, |
| "grad_norm": 0.36702919006347656, |
| "learning_rate": 9.888941417348453e-06, |
| "loss": 1.1581, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.007295329530034872, |
| "grad_norm": 0.36125248670578003, |
| "learning_rate": 9.887973001818473e-06, |
| "loss": 0.5188, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.007324510848155011, |
| "grad_norm": 0.32154127955436707, |
| "learning_rate": 9.887000430205173e-06, |
| "loss": 0.5811, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.007353692166275151, |
| "grad_norm": 0.3822373151779175, |
| "learning_rate": 9.886023703335493e-06, |
| "loss": 0.69, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.00738287348439529, |
| "grad_norm": 0.4168561100959778, |
| "learning_rate": 9.885042822039915e-06, |
| "loss": 0.9765, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.0074120548025154295, |
| "grad_norm": 0.4219783842563629, |
| "learning_rate": 9.884057787152451e-06, |
| "loss": 1.3698, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.007441236120635569, |
| "grad_norm": 0.32248085737228394, |
| "learning_rate": 9.88306859951064e-06, |
| "loss": 0.4901, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.007470417438755709, |
| "grad_norm": 0.4942518174648285, |
| "learning_rate": 9.88207525995556e-06, |
| "loss": 0.7452, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.007499598756875848, |
| "grad_norm": 0.32152867317199707, |
| "learning_rate": 9.881077769331811e-06, |
| "loss": 0.6269, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.007528780074995987, |
| "grad_norm": 0.4080241322517395, |
| "learning_rate": 9.88007612848753e-06, |
| "loss": 0.9697, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.007557961393116127, |
| "grad_norm": 0.40665292739868164, |
| "learning_rate": 9.879070338274379e-06, |
| "loss": 0.6378, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.007587142711236267, |
| "grad_norm": 0.43658050894737244, |
| "learning_rate": 9.878060399547547e-06, |
| "loss": 1.5812, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.007616324029356406, |
| "grad_norm": 0.3814539611339569, |
| "learning_rate": 9.877046313165754e-06, |
| "loss": 0.8156, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.007645505347476545, |
| "grad_norm": 0.40121573209762573, |
| "learning_rate": 9.876028079991242e-06, |
| "loss": 0.735, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.007674686665596685, |
| "grad_norm": 0.3110452890396118, |
| "learning_rate": 9.875005700889782e-06, |
| "loss": 1.0318, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.007703867983716825, |
| "grad_norm": 0.36410999298095703, |
| "learning_rate": 9.87397917673067e-06, |
| "loss": 1.0398, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.007733049301836964, |
| "grad_norm": 0.3949889838695526, |
| "learning_rate": 9.872948508386727e-06, |
| "loss": 1.3404, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.007762230619957103, |
| "grad_norm": 0.3452715277671814, |
| "learning_rate": 9.871913696734293e-06, |
| "loss": 1.7137, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.007791411938077243, |
| "grad_norm": 0.49451926350593567, |
| "learning_rate": 9.870874742653238e-06, |
| "loss": 0.6079, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.007820593256197382, |
| "grad_norm": 0.353254497051239, |
| "learning_rate": 9.869831647026948e-06, |
| "loss": 1.5703, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.007849774574317523, |
| "grad_norm": 0.39976930618286133, |
| "learning_rate": 9.868784410742337e-06, |
| "loss": 0.8591, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.007878955892437662, |
| "grad_norm": 0.3353979289531708, |
| "learning_rate": 9.867733034689828e-06, |
| "loss": 0.4523, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.007908137210557801, |
| "grad_norm": 0.332116037607193, |
| "learning_rate": 9.866677519763381e-06, |
| "loss": 0.6274, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.00793731852867794, |
| "grad_norm": 0.39066842198371887, |
| "learning_rate": 9.86561786686046e-06, |
| "loss": 1.1203, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.00796649984679808, |
| "grad_norm": 0.3723788261413574, |
| "learning_rate": 9.864554076882055e-06, |
| "loss": 0.5385, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.007995681164918219, |
| "grad_norm": 0.2947128713130951, |
| "learning_rate": 9.86348615073267e-06, |
| "loss": 1.0578, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.008024862483038358, |
| "grad_norm": 0.34853091835975647, |
| "learning_rate": 9.862414089320331e-06, |
| "loss": 0.4852, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.008054043801158497, |
| "grad_norm": 0.3926672637462616, |
| "learning_rate": 9.861337893556574e-06, |
| "loss": 1.1969, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.008083225119278638, |
| "grad_norm": 0.7392916083335876, |
| "learning_rate": 9.860257564356452e-06, |
| "loss": 1.2007, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.008112406437398778, |
| "grad_norm": 0.42414942383766174, |
| "learning_rate": 9.859173102638538e-06, |
| "loss": 1.0842, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.008141587755518917, |
| "grad_norm": 0.35072061419487, |
| "learning_rate": 9.858084509324908e-06, |
| "loss": 1.1563, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.008170769073639056, |
| "grad_norm": 0.44247132539749146, |
| "learning_rate": 9.856991785341164e-06, |
| "loss": 0.7369, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.008199950391759196, |
| "grad_norm": 0.39098939299583435, |
| "learning_rate": 9.855894931616407e-06, |
| "loss": 0.6189, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.008229131709879335, |
| "grad_norm": 0.3257642090320587, |
| "learning_rate": 9.854793949083262e-06, |
| "loss": 1.1748, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.008258313027999474, |
| "grad_norm": 0.4087084233760834, |
| "learning_rate": 9.853688838677852e-06, |
| "loss": 0.7535, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.008287494346119615, |
| "grad_norm": 0.32719582319259644, |
| "learning_rate": 9.852579601339821e-06, |
| "loss": 1.213, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.008316675664239754, |
| "grad_norm": 0.3474045395851135, |
| "learning_rate": 9.851466238012317e-06, |
| "loss": 1.1946, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.008345856982359894, |
| "grad_norm": 0.2800231873989105, |
| "learning_rate": 9.850348749641993e-06, |
| "loss": 0.9802, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.008375038300480033, |
| "grad_norm": 0.39679044485092163, |
| "learning_rate": 9.849227137179015e-06, |
| "loss": 0.8683, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.008404219618600172, |
| "grad_norm": 0.359581857919693, |
| "learning_rate": 9.848101401577052e-06, |
| "loss": 0.5004, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.008433400936720311, |
| "grad_norm": 0.6436200737953186, |
| "learning_rate": 9.846971543793285e-06, |
| "loss": 1.0706, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.00846258225484045, |
| "grad_norm": 0.3004380464553833, |
| "learning_rate": 9.845837564788387e-06, |
| "loss": 0.4675, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.00849176357296059, |
| "grad_norm": 0.4008398652076721, |
| "learning_rate": 9.84469946552655e-06, |
| "loss": 0.6808, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.00852094489108073, |
| "grad_norm": 0.3354049623012543, |
| "learning_rate": 9.843557246975459e-06, |
| "loss": 0.4668, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.00855012620920087, |
| "grad_norm": 0.9540996551513672, |
| "learning_rate": 9.842410910106305e-06, |
| "loss": 0.6828, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.00857930752732101, |
| "grad_norm": 0.36251530051231384, |
| "learning_rate": 9.841260455893784e-06, |
| "loss": 1.7819, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.008608488845441149, |
| "grad_norm": 0.28926217555999756, |
| "learning_rate": 9.840105885316087e-06, |
| "loss": 0.4854, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.008637670163561288, |
| "grad_norm": 0.3726727068424225, |
| "learning_rate": 9.838947199354905e-06, |
| "loss": 0.5524, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.008666851481681427, |
| "grad_norm": 0.35486266016960144, |
| "learning_rate": 9.837784398995436e-06, |
| "loss": 1.1738, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.008696032799801566, |
| "grad_norm": 0.3808845281600952, |
| "learning_rate": 9.836617485226368e-06, |
| "loss": 1.1613, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.008725214117921707, |
| "grad_norm": 0.30906185507774353, |
| "learning_rate": 9.835446459039888e-06, |
| "loss": 0.6047, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.008754395436041847, |
| "grad_norm": 0.37905341386795044, |
| "learning_rate": 9.834271321431686e-06, |
| "loss": 0.5108, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.008783576754161986, |
| "grad_norm": 0.37152329087257385, |
| "learning_rate": 9.833092073400938e-06, |
| "loss": 1.2867, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.008812758072282125, |
| "grad_norm": 0.3889938294887543, |
| "learning_rate": 9.831908715950325e-06, |
| "loss": 0.516, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.008841939390402264, |
| "grad_norm": 0.31600430607795715, |
| "learning_rate": 9.830721250086011e-06, |
| "loss": 0.5475, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.008871120708522404, |
| "grad_norm": 0.36560872197151184, |
| "learning_rate": 9.829529676817664e-06, |
| "loss": 0.6687, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.008900302026642543, |
| "grad_norm": 0.33795249462127686, |
| "learning_rate": 9.828333997158438e-06, |
| "loss": 0.8979, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.008929483344762682, |
| "grad_norm": 0.37777137756347656, |
| "learning_rate": 9.827134212124983e-06, |
| "loss": 0.4487, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.008958664662882823, |
| "grad_norm": 0.38079383969306946, |
| "learning_rate": 9.825930322737433e-06, |
| "loss": 0.6556, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.008987845981002962, |
| "grad_norm": 0.35069790482521057, |
| "learning_rate": 9.824722330019416e-06, |
| "loss": 0.5511, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.009017027299123102, |
| "grad_norm": 0.3270156979560852, |
| "learning_rate": 9.823510234998052e-06, |
| "loss": 0.6456, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.009046208617243241, |
| "grad_norm": 0.3854398727416992, |
| "learning_rate": 9.822294038703942e-06, |
| "loss": 1.1342, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.00907538993536338, |
| "grad_norm": 0.5024343729019165, |
| "learning_rate": 9.821073742171179e-06, |
| "loss": 1.3163, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.00910457125348352, |
| "grad_norm": 0.3697148561477661, |
| "learning_rate": 9.819849346437342e-06, |
| "loss": 0.472, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.009133752571603659, |
| "grad_norm": 0.5523271560668945, |
| "learning_rate": 9.818620852543495e-06, |
| "loss": 0.8309, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.0091629338897238, |
| "grad_norm": 0.49423748254776, |
| "learning_rate": 9.817388261534185e-06, |
| "loss": 1.4531, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.009192115207843939, |
| "grad_norm": 0.40192165970802307, |
| "learning_rate": 9.816151574457444e-06, |
| "loss": 0.6268, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.009221296525964078, |
| "grad_norm": 0.374344140291214, |
| "learning_rate": 9.814910792364787e-06, |
| "loss": 0.7519, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.009250477844084217, |
| "grad_norm": 0.3867436945438385, |
| "learning_rate": 9.81366591631121e-06, |
| "loss": 0.5716, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.009279659162204357, |
| "grad_norm": 0.3703562319278717, |
| "learning_rate": 9.812416947355189e-06, |
| "loss": 0.5265, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.009308840480324496, |
| "grad_norm": 0.3638060390949249, |
| "learning_rate": 9.811163886558683e-06, |
| "loss": 0.6116, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.009338021798444635, |
| "grad_norm": 0.308459609746933, |
| "learning_rate": 9.80990673498713e-06, |
| "loss": 0.4987, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.009367203116564774, |
| "grad_norm": 0.4370526671409607, |
| "learning_rate": 9.80864549370944e-06, |
| "loss": 0.5645, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.009396384434684915, |
| "grad_norm": 0.3499382436275482, |
| "learning_rate": 9.807380163798009e-06, |
| "loss": 0.5573, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.009425565752805055, |
| "grad_norm": 0.4345581829547882, |
| "learning_rate": 9.806110746328705e-06, |
| "loss": 1.2744, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.009454747070925194, |
| "grad_norm": 0.3100895285606384, |
| "learning_rate": 9.804837242380873e-06, |
| "loss": 0.6461, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.009483928389045333, |
| "grad_norm": 0.5291584730148315, |
| "learning_rate": 9.803559653037328e-06, |
| "loss": 1.0081, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.009513109707165473, |
| "grad_norm": 0.3431501090526581, |
| "learning_rate": 9.802277979384367e-06, |
| "loss": 0.5404, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.009542291025285612, |
| "grad_norm": 0.38304755091667175, |
| "learning_rate": 9.800992222511753e-06, |
| "loss": 0.6265, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.009571472343405751, |
| "grad_norm": 0.3770524561405182, |
| "learning_rate": 9.799702383512721e-06, |
| "loss": 0.6301, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.00960065366152589, |
| "grad_norm": 0.3577704429626465, |
| "learning_rate": 9.798408463483982e-06, |
| "loss": 0.711, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.009629834979646031, |
| "grad_norm": 0.4507421851158142, |
| "learning_rate": 9.797110463525715e-06, |
| "loss": 1.2938, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.00965901629776617, |
| "grad_norm": 0.3144557774066925, |
| "learning_rate": 9.79580838474156e-06, |
| "loss": 0.5169, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.00968819761588631, |
| "grad_norm": 0.3704127371311188, |
| "learning_rate": 9.794502228238638e-06, |
| "loss": 0.7512, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.009717378934006449, |
| "grad_norm": 0.31256386637687683, |
| "learning_rate": 9.79319199512753e-06, |
| "loss": 1.0887, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.009746560252126588, |
| "grad_norm": 0.3950251042842865, |
| "learning_rate": 9.791877686522285e-06, |
| "loss": 1.4947, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.009775741570246728, |
| "grad_norm": 0.4597548544406891, |
| "learning_rate": 9.790559303540413e-06, |
| "loss": 1.3732, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.009804922888366867, |
| "grad_norm": 0.3754691481590271, |
| "learning_rate": 9.789236847302896e-06, |
| "loss": 0.4873, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.009834104206487008, |
| "grad_norm": 0.33539366722106934, |
| "learning_rate": 9.787910318934172e-06, |
| "loss": 0.5393, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.009863285524607147, |
| "grad_norm": 0.31377556920051575, |
| "learning_rate": 9.786579719562146e-06, |
| "loss": 0.4385, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.009892466842727286, |
| "grad_norm": 0.3356408178806305, |
| "learning_rate": 9.785245050318184e-06, |
| "loss": 0.5648, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.009921648160847426, |
| "grad_norm": 0.34536507725715637, |
| "learning_rate": 9.78390631233711e-06, |
| "loss": 0.4974, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.009950829478967565, |
| "grad_norm": 0.3652310371398926, |
| "learning_rate": 9.78256350675721e-06, |
| "loss": 1.1248, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.009980010797087704, |
| "grad_norm": 0.4672922194004059, |
| "learning_rate": 9.781216634720227e-06, |
| "loss": 1.1616, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.010009192115207843, |
| "grad_norm": 0.36882463097572327, |
| "learning_rate": 9.779865697371362e-06, |
| "loss": 0.5269, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.010038373433327983, |
| "grad_norm": 0.4571130573749542, |
| "learning_rate": 9.778510695859274e-06, |
| "loss": 0.8763, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.010067554751448124, |
| "grad_norm": 0.33660727739334106, |
| "learning_rate": 9.777151631336074e-06, |
| "loss": 0.5926, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.010096736069568263, |
| "grad_norm": 0.4739161431789398, |
| "learning_rate": 9.775788504957334e-06, |
| "loss": 0.7474, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.010125917387688402, |
| "grad_norm": 0.34859299659729004, |
| "learning_rate": 9.774421317882071e-06, |
| "loss": 0.6424, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.010155098705808541, |
| "grad_norm": 0.3759802579879761, |
| "learning_rate": 9.773050071272764e-06, |
| "loss": 0.6449, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.01018428002392868, |
| "grad_norm": 0.40361499786376953, |
| "learning_rate": 9.771674766295334e-06, |
| "loss": 1.6188, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.01021346134204882, |
| "grad_norm": 0.3608465790748596, |
| "learning_rate": 9.770295404119163e-06, |
| "loss": 0.5166, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.01024264266016896, |
| "grad_norm": 0.3379482626914978, |
| "learning_rate": 9.768911985917073e-06, |
| "loss": 0.5377, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.0102718239782891, |
| "grad_norm": 0.39209169149398804, |
| "learning_rate": 9.767524512865342e-06, |
| "loss": 0.8392, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.01030100529640924, |
| "grad_norm": 0.3134850859642029, |
| "learning_rate": 9.766132986143694e-06, |
| "loss": 0.6323, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.010330186614529379, |
| "grad_norm": 0.409471333026886, |
| "learning_rate": 9.764737406935295e-06, |
| "loss": 0.6607, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.010359367932649518, |
| "grad_norm": 0.40053945779800415, |
| "learning_rate": 9.763337776426762e-06, |
| "loss": 0.9634, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.010388549250769657, |
| "grad_norm": 0.5267995595932007, |
| "learning_rate": 9.761934095808156e-06, |
| "loss": 1.3353, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.010417730568889796, |
| "grad_norm": 0.3803673982620239, |
| "learning_rate": 9.760526366272978e-06, |
| "loss": 0.7678, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.010446911887009936, |
| "grad_norm": 0.3714295029640198, |
| "learning_rate": 9.759114589018178e-06, |
| "loss": 0.5707, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.010476093205130075, |
| "grad_norm": 0.34496983885765076, |
| "learning_rate": 9.75769876524414e-06, |
| "loss": 0.7947, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.010505274523250216, |
| "grad_norm": 0.32947367429733276, |
| "learning_rate": 9.756278896154693e-06, |
| "loss": 0.9273, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.010534455841370355, |
| "grad_norm": 0.399161159992218, |
| "learning_rate": 9.75485498295711e-06, |
| "loss": 1.1393, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.010563637159490494, |
| "grad_norm": 1.0698360204696655, |
| "learning_rate": 9.753427026862092e-06, |
| "loss": 1.2065, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.010592818477610634, |
| "grad_norm": 0.36978745460510254, |
| "learning_rate": 9.751995029083786e-06, |
| "loss": 1.0173, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.010621999795730773, |
| "grad_norm": 0.4568940997123718, |
| "learning_rate": 9.750558990839773e-06, |
| "loss": 0.6282, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.010651181113850912, |
| "grad_norm": 0.3994036614894867, |
| "learning_rate": 9.749118913351069e-06, |
| "loss": 0.8021, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.010680362431971051, |
| "grad_norm": 0.3851848244667053, |
| "learning_rate": 9.747674797842124e-06, |
| "loss": 0.6805, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.010709543750091192, |
| "grad_norm": 0.36664196848869324, |
| "learning_rate": 9.746226645540822e-06, |
| "loss": 0.6545, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.010738725068211332, |
| "grad_norm": 0.3806273937225342, |
| "learning_rate": 9.74477445767848e-06, |
| "loss": 0.7037, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.010767906386331471, |
| "grad_norm": 0.3356926143169403, |
| "learning_rate": 9.743318235489846e-06, |
| "loss": 0.5096, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.01079708770445161, |
| "grad_norm": 0.5911070108413696, |
| "learning_rate": 9.741857980213101e-06, |
| "loss": 0.9225, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.01082626902257175, |
| "grad_norm": 0.36224010586738586, |
| "learning_rate": 9.740393693089844e-06, |
| "loss": 0.6222, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.010855450340691889, |
| "grad_norm": 0.35221293568611145, |
| "learning_rate": 9.73892537536512e-06, |
| "loss": 0.4876, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.010884631658812028, |
| "grad_norm": 0.33988136053085327, |
| "learning_rate": 9.737453028287383e-06, |
| "loss": 0.7258, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.010913812976932167, |
| "grad_norm": 0.32662105560302734, |
| "learning_rate": 9.735976653108527e-06, |
| "loss": 0.558, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.010942994295052308, |
| "grad_norm": 0.33718347549438477, |
| "learning_rate": 9.734496251083865e-06, |
| "loss": 0.5788, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.010972175613172448, |
| "grad_norm": 0.38472211360931396, |
| "learning_rate": 9.733011823472131e-06, |
| "loss": 0.7207, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.011001356931292587, |
| "grad_norm": 0.3394148349761963, |
| "learning_rate": 9.731523371535488e-06, |
| "loss": 0.5383, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.011030538249412726, |
| "grad_norm": 0.3245641589164734, |
| "learning_rate": 9.730030896539518e-06, |
| "loss": 0.517, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.011059719567532865, |
| "grad_norm": 0.3804280459880829, |
| "learning_rate": 9.728534399753222e-06, |
| "loss": 0.5206, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.011088900885653005, |
| "grad_norm": 0.6100478172302246, |
| "learning_rate": 9.727033882449023e-06, |
| "loss": 0.7553, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.011118082203773144, |
| "grad_norm": 0.4965435266494751, |
| "learning_rate": 9.725529345902763e-06, |
| "loss": 0.574, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.011147263521893283, |
| "grad_norm": 0.31076616048812866, |
| "learning_rate": 9.724020791393698e-06, |
| "loss": 0.5868, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.011176444840013424, |
| "grad_norm": 0.3200540542602539, |
| "learning_rate": 9.722508220204501e-06, |
| "loss": 0.6465, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.011205626158133563, |
| "grad_norm": 0.44288474321365356, |
| "learning_rate": 9.720991633621268e-06, |
| "loss": 1.0584, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.011234807476253703, |
| "grad_norm": 0.33271703124046326, |
| "learning_rate": 9.719471032933496e-06, |
| "loss": 0.6802, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.011263988794373842, |
| "grad_norm": 0.3422086238861084, |
| "learning_rate": 9.717946419434108e-06, |
| "loss": 0.4469, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.011293170112493981, |
| "grad_norm": 0.46215254068374634, |
| "learning_rate": 9.716417794419428e-06, |
| "loss": 0.5714, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.01132235143061412, |
| "grad_norm": 0.32373300194740295, |
| "learning_rate": 9.714885159189198e-06, |
| "loss": 1.0706, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.01135153274873426, |
| "grad_norm": 0.3015748858451843, |
| "learning_rate": 9.713348515046566e-06, |
| "loss": 0.4271, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.0113807140668544, |
| "grad_norm": 0.6078203320503235, |
| "learning_rate": 9.711807863298092e-06, |
| "loss": 0.7983, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.01140989538497454, |
| "grad_norm": 0.44928136467933655, |
| "learning_rate": 9.710263205253743e-06, |
| "loss": 1.0152, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.011439076703094679, |
| "grad_norm": 0.3478546440601349, |
| "learning_rate": 9.708714542226887e-06, |
| "loss": 0.6882, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.011468258021214818, |
| "grad_norm": 0.6222825050354004, |
| "learning_rate": 9.707161875534304e-06, |
| "loss": 1.0782, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.011497439339334958, |
| "grad_norm": 0.36261993646621704, |
| "learning_rate": 9.705605206496176e-06, |
| "loss": 0.625, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.011526620657455097, |
| "grad_norm": 0.304210901260376, |
| "learning_rate": 9.704044536436085e-06, |
| "loss": 0.3843, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.011555801975575236, |
| "grad_norm": 0.35136616230010986, |
| "learning_rate": 9.702479866681023e-06, |
| "loss": 1.0645, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.011584983293695375, |
| "grad_norm": 0.35249078273773193, |
| "learning_rate": 9.700911198561371e-06, |
| "loss": 0.559, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.011614164611815516, |
| "grad_norm": 0.28680017590522766, |
| "learning_rate": 9.69933853341092e-06, |
| "loss": 0.4269, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.011643345929935656, |
| "grad_norm": 0.3262515962123871, |
| "learning_rate": 9.697761872566856e-06, |
| "loss": 0.5446, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.011672527248055795, |
| "grad_norm": 0.3692777156829834, |
| "learning_rate": 9.69618121736976e-06, |
| "loss": 0.5779, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.011701708566175934, |
| "grad_norm": 0.2981965243816376, |
| "learning_rate": 9.694596569163612e-06, |
| "loss": 0.4808, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.011730889884296073, |
| "grad_norm": 0.35515785217285156, |
| "learning_rate": 9.693007929295785e-06, |
| "loss": 0.7093, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.011760071202416213, |
| "grad_norm": 0.41098204255104065, |
| "learning_rate": 9.691415299117052e-06, |
| "loss": 0.9059, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.011789252520536352, |
| "grad_norm": 0.5656394958496094, |
| "learning_rate": 9.689818679981571e-06, |
| "loss": 1.5732, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.011818433838656493, |
| "grad_norm": 0.2940177023410797, |
| "learning_rate": 9.688218073246894e-06, |
| "loss": 0.4407, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.011847615156776632, |
| "grad_norm": 0.36513781547546387, |
| "learning_rate": 9.686613480273965e-06, |
| "loss": 0.5507, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.011876796474896771, |
| "grad_norm": 0.3105609118938446, |
| "learning_rate": 9.68500490242712e-06, |
| "loss": 0.5836, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.01190597779301691, |
| "grad_norm": 0.38569653034210205, |
| "learning_rate": 9.683392341074077e-06, |
| "loss": 0.8858, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.01193515911113705, |
| "grad_norm": 0.3530753254890442, |
| "learning_rate": 9.681775797585943e-06, |
| "loss": 0.4907, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.01196434042925719, |
| "grad_norm": 0.39760276675224304, |
| "learning_rate": 9.680155273337216e-06, |
| "loss": 0.7455, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.011993521747377328, |
| "grad_norm": 0.39642858505249023, |
| "learning_rate": 9.678530769705772e-06, |
| "loss": 0.6158, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.012022703065497468, |
| "grad_norm": 0.7576881647109985, |
| "learning_rate": 9.676902288072874e-06, |
| "loss": 0.5047, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.012051884383617609, |
| "grad_norm": 0.33636897802352905, |
| "learning_rate": 9.675269829823164e-06, |
| "loss": 0.6555, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.012081065701737748, |
| "grad_norm": 0.5256043076515198, |
| "learning_rate": 9.673633396344672e-06, |
| "loss": 0.8975, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.012110247019857887, |
| "grad_norm": 0.3417125344276428, |
| "learning_rate": 9.671992989028801e-06, |
| "loss": 0.9778, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.012139428337978026, |
| "grad_norm": 0.4242870509624481, |
| "learning_rate": 9.670348609270337e-06, |
| "loss": 0.871, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.012168609656098166, |
| "grad_norm": 0.4195970296859741, |
| "learning_rate": 9.668700258467441e-06, |
| "loss": 0.8084, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.012197790974218305, |
| "grad_norm": 0.3127722442150116, |
| "learning_rate": 9.667047938021653e-06, |
| "loss": 0.9288, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.012226972292338444, |
| "grad_norm": 0.35216641426086426, |
| "learning_rate": 9.665391649337886e-06, |
| "loss": 0.7857, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.012256153610458585, |
| "grad_norm": 0.3610037565231323, |
| "learning_rate": 9.663731393824427e-06, |
| "loss": 0.6889, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.012285334928578725, |
| "grad_norm": 0.3709530234336853, |
| "learning_rate": 9.66206717289294e-06, |
| "loss": 0.6927, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.012314516246698864, |
| "grad_norm": 0.5423220992088318, |
| "learning_rate": 9.660398987958455e-06, |
| "loss": 0.6138, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.012343697564819003, |
| "grad_norm": 0.3675086796283722, |
| "learning_rate": 9.658726840439376e-06, |
| "loss": 0.6768, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.012372878882939142, |
| "grad_norm": 0.3232286274433136, |
| "learning_rate": 9.657050731757475e-06, |
| "loss": 1.0044, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.012402060201059282, |
| "grad_norm": 0.3915942907333374, |
| "learning_rate": 9.655370663337892e-06, |
| "loss": 1.7632, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.01243124151917942, |
| "grad_norm": 0.3224577009677887, |
| "learning_rate": 9.653686636609133e-06, |
| "loss": 0.5068, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.01246042283729956, |
| "grad_norm": 0.33855387568473816, |
| "learning_rate": 9.651998653003076e-06, |
| "loss": 0.3169, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.012489604155419701, |
| "grad_norm": 0.3714600205421448, |
| "learning_rate": 9.650306713954952e-06, |
| "loss": 1.3035, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.01251878547353984, |
| "grad_norm": 0.44817546010017395, |
| "learning_rate": 9.648610820903362e-06, |
| "loss": 0.6215, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.01254796679165998, |
| "grad_norm": 0.320726603269577, |
| "learning_rate": 9.646910975290273e-06, |
| "loss": 0.5211, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.012577148109780119, |
| "grad_norm": 0.5175723433494568, |
| "learning_rate": 9.645207178561003e-06, |
| "loss": 1.2928, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.012606329427900258, |
| "grad_norm": 0.4380705952644348, |
| "learning_rate": 9.643499432164237e-06, |
| "loss": 0.9595, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.012635510746020397, |
| "grad_norm": 0.6213127374649048, |
| "learning_rate": 9.641787737552017e-06, |
| "loss": 0.5555, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.012664692064140537, |
| "grad_norm": 0.32056528329849243, |
| "learning_rate": 9.640072096179739e-06, |
| "loss": 1.2118, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.012693873382260676, |
| "grad_norm": 0.3587930500507355, |
| "learning_rate": 9.638352509506155e-06, |
| "loss": 0.6076, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.012723054700380817, |
| "grad_norm": 0.3511458933353424, |
| "learning_rate": 9.636628978993376e-06, |
| "loss": 0.6903, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.012752236018500956, |
| "grad_norm": 0.31581902503967285, |
| "learning_rate": 9.634901506106863e-06, |
| "loss": 0.4332, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.012781417336621095, |
| "grad_norm": 0.3287270963191986, |
| "learning_rate": 9.633170092315428e-06, |
| "loss": 0.5847, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.012810598654741235, |
| "grad_norm": 0.45387589931488037, |
| "learning_rate": 9.631434739091238e-06, |
| "loss": 0.8871, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.012839779972861374, |
| "grad_norm": 0.3239494264125824, |
| "learning_rate": 9.629695447909806e-06, |
| "loss": 0.6483, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.012868961290981513, |
| "grad_norm": 0.42458924651145935, |
| "learning_rate": 9.627952220249991e-06, |
| "loss": 1.4521, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.012898142609101652, |
| "grad_norm": 0.3900853395462036, |
| "learning_rate": 9.626205057594008e-06, |
| "loss": 0.8409, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.012927323927221793, |
| "grad_norm": 0.35969194769859314, |
| "learning_rate": 9.624453961427412e-06, |
| "loss": 0.4441, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.012956505245341933, |
| "grad_norm": 0.335550457239151, |
| "learning_rate": 9.622698933239097e-06, |
| "loss": 1.6135, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.012985686563462072, |
| "grad_norm": 0.34555917978286743, |
| "learning_rate": 9.62093997452131e-06, |
| "loss": 1.499, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.013014867881582211, |
| "grad_norm": 0.32284262776374817, |
| "learning_rate": 9.619177086769635e-06, |
| "loss": 0.703, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.01304404919970235, |
| "grad_norm": 0.40233612060546875, |
| "learning_rate": 9.617410271482999e-06, |
| "loss": 1.0828, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.01307323051782249, |
| "grad_norm": 0.31386712193489075, |
| "learning_rate": 9.615639530163664e-06, |
| "loss": 0.9996, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.013102411835942629, |
| "grad_norm": 0.3874974250793457, |
| "learning_rate": 9.613864864317237e-06, |
| "loss": 0.6509, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.013131593154062768, |
| "grad_norm": 0.31677138805389404, |
| "learning_rate": 9.612086275452657e-06, |
| "loss": 0.535, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.01316077447218291, |
| "grad_norm": 0.3604523539543152, |
| "learning_rate": 9.610303765082198e-06, |
| "loss": 0.6228, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.013189955790303048, |
| "grad_norm": 0.3448488414287567, |
| "learning_rate": 9.608517334721474e-06, |
| "loss": 0.5788, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.013219137108423188, |
| "grad_norm": 0.4131591022014618, |
| "learning_rate": 9.606726985889425e-06, |
| "loss": 0.4029, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.013248318426543327, |
| "grad_norm": 0.38808080554008484, |
| "learning_rate": 9.604932720108326e-06, |
| "loss": 0.6852, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.013277499744663466, |
| "grad_norm": 0.35633230209350586, |
| "learning_rate": 9.603134538903783e-06, |
| "loss": 0.7387, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.013306681062783605, |
| "grad_norm": 0.3555307984352112, |
| "learning_rate": 9.601332443804732e-06, |
| "loss": 1.1693, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.013335862380903745, |
| "grad_norm": 0.35522693395614624, |
| "learning_rate": 9.599526436343434e-06, |
| "loss": 0.5799, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.013365043699023886, |
| "grad_norm": 0.38670095801353455, |
| "learning_rate": 9.59771651805548e-06, |
| "loss": 1.0986, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.013394225017144025, |
| "grad_norm": 0.3780023455619812, |
| "learning_rate": 9.595902690479779e-06, |
| "loss": 1.1947, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.013423406335264164, |
| "grad_norm": 0.3738841116428375, |
| "learning_rate": 9.594084955158576e-06, |
| "loss": 0.6544, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.013452587653384303, |
| "grad_norm": 0.4092550575733185, |
| "learning_rate": 9.592263313637427e-06, |
| "loss": 0.6366, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.013481768971504443, |
| "grad_norm": 0.38792550563812256, |
| "learning_rate": 9.590437767465215e-06, |
| "loss": 0.6108, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.013510950289624582, |
| "grad_norm": 0.38298285007476807, |
| "learning_rate": 9.588608318194144e-06, |
| "loss": 0.6127, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.013540131607744721, |
| "grad_norm": 0.3888932168483734, |
| "learning_rate": 9.586774967379732e-06, |
| "loss": 0.6993, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.01356931292586486, |
| "grad_norm": 0.44153282046318054, |
| "learning_rate": 9.58493771658082e-06, |
| "loss": 1.287, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.013598494243985002, |
| "grad_norm": 0.30452239513397217, |
| "learning_rate": 9.58309656735956e-06, |
| "loss": 0.3715, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.01362767556210514, |
| "grad_norm": 0.3128340244293213, |
| "learning_rate": 9.58125152128142e-06, |
| "loss": 0.4638, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.01365685688022528, |
| "grad_norm": 0.3768156170845032, |
| "learning_rate": 9.579402579915187e-06, |
| "loss": 0.8363, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.01368603819834542, |
| "grad_norm": 0.38790008425712585, |
| "learning_rate": 9.577549744832952e-06, |
| "loss": 0.6479, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.013715219516465559, |
| "grad_norm": 0.36286771297454834, |
| "learning_rate": 9.575693017610118e-06, |
| "loss": 0.9808, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.013744400834585698, |
| "grad_norm": 0.6221984624862671, |
| "learning_rate": 9.573832399825403e-06, |
| "loss": 1.1708, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.013773582152705837, |
| "grad_norm": 0.37667420506477356, |
| "learning_rate": 9.571967893060827e-06, |
| "loss": 1.064, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.013802763470825978, |
| "grad_norm": 0.31978538632392883, |
| "learning_rate": 9.570099498901722e-06, |
| "loss": 0.7165, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.013831944788946117, |
| "grad_norm": 0.5492832064628601, |
| "learning_rate": 9.568227218936719e-06, |
| "loss": 0.9937, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.013861126107066257, |
| "grad_norm": 0.37476104497909546, |
| "learning_rate": 9.566351054757756e-06, |
| "loss": 0.6541, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.013890307425186396, |
| "grad_norm": 0.3576231002807617, |
| "learning_rate": 9.564471007960077e-06, |
| "loss": 0.7521, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.013919488743306535, |
| "grad_norm": 0.3468270003795624, |
| "learning_rate": 9.562587080142222e-06, |
| "loss": 1.16, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.013948670061426674, |
| "grad_norm": 0.3201936185359955, |
| "learning_rate": 9.560699272906034e-06, |
| "loss": 0.5604, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.013977851379546814, |
| "grad_norm": 0.3727940618991852, |
| "learning_rate": 9.558807587856654e-06, |
| "loss": 0.7413, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.014007032697666953, |
| "grad_norm": 0.3238644599914551, |
| "learning_rate": 9.55691202660252e-06, |
| "loss": 0.3379, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.014036214015787094, |
| "grad_norm": 0.35313352942466736, |
| "learning_rate": 9.555012590755364e-06, |
| "loss": 1.1398, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.014065395333907233, |
| "grad_norm": 0.42580607533454895, |
| "learning_rate": 9.553109281930213e-06, |
| "loss": 0.8138, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.014094576652027372, |
| "grad_norm": 0.35797998309135437, |
| "learning_rate": 9.551202101745394e-06, |
| "loss": 0.5217, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.014123757970147512, |
| "grad_norm": 0.5001800656318665, |
| "learning_rate": 9.549291051822513e-06, |
| "loss": 1.5534, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.01415293928826765, |
| "grad_norm": 0.35367849469184875, |
| "learning_rate": 9.547376133786476e-06, |
| "loss": 0.5845, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.01418212060638779, |
| "grad_norm": 0.3817846477031708, |
| "learning_rate": 9.545457349265478e-06, |
| "loss": 0.5564, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.01421130192450793, |
| "grad_norm": 0.40490975975990295, |
| "learning_rate": 9.543534699890996e-06, |
| "loss": 1.2813, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.01424048324262807, |
| "grad_norm": 0.3181605637073517, |
| "learning_rate": 9.541608187297793e-06, |
| "loss": 0.9636, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.01426966456074821, |
| "grad_norm": 0.32338184118270874, |
| "learning_rate": 9.539677813123923e-06, |
| "loss": 0.6198, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.014298845878868349, |
| "grad_norm": 0.3252333104610443, |
| "learning_rate": 9.53774357901072e-06, |
| "loss": 0.536, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.014328027196988488, |
| "grad_norm": 0.34557807445526123, |
| "learning_rate": 9.535805486602796e-06, |
| "loss": 0.3843, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.014357208515108627, |
| "grad_norm": 0.3492249548435211, |
| "learning_rate": 9.533863537548055e-06, |
| "loss": 0.4399, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.014386389833228767, |
| "grad_norm": 0.540067732334137, |
| "learning_rate": 9.531917733497665e-06, |
| "loss": 0.6152, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.014415571151348906, |
| "grad_norm": 0.29029080271720886, |
| "learning_rate": 9.529968076106082e-06, |
| "loss": 0.5746, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.014444752469469045, |
| "grad_norm": 0.36528632044792175, |
| "learning_rate": 9.528014567031039e-06, |
| "loss": 1.0559, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.014473933787589186, |
| "grad_norm": 0.46830108761787415, |
| "learning_rate": 9.526057207933537e-06, |
| "loss": 1.023, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.014503115105709325, |
| "grad_norm": 0.3530757427215576, |
| "learning_rate": 9.524096000477855e-06, |
| "loss": 1.1214, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.014532296423829465, |
| "grad_norm": 0.44631099700927734, |
| "learning_rate": 9.522130946331545e-06, |
| "loss": 0.6986, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.014561477741949604, |
| "grad_norm": 0.3615671992301941, |
| "learning_rate": 9.520162047165427e-06, |
| "loss": 0.6395, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.014590659060069743, |
| "grad_norm": 0.3598816990852356, |
| "learning_rate": 9.51818930465359e-06, |
| "loss": 0.6153, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.014619840378189882, |
| "grad_norm": 0.4001208245754242, |
| "learning_rate": 9.516212720473396e-06, |
| "loss": 0.5419, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.014649021696310022, |
| "grad_norm": 0.34737879037857056, |
| "learning_rate": 9.514232296305466e-06, |
| "loss": 1.1232, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.014678203014430161, |
| "grad_norm": 0.39558371901512146, |
| "learning_rate": 9.512248033833692e-06, |
| "loss": 1.5214, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.014707384332550302, |
| "grad_norm": 0.32832667231559753, |
| "learning_rate": 9.510259934745227e-06, |
| "loss": 0.5332, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.014736565650670441, |
| "grad_norm": 0.4458834230899811, |
| "learning_rate": 9.508268000730487e-06, |
| "loss": 0.5398, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.01476574696879058, |
| "grad_norm": 0.3687651455402374, |
| "learning_rate": 9.506272233483147e-06, |
| "loss": 0.5837, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.01479492828691072, |
| "grad_norm": 0.36100104451179504, |
| "learning_rate": 9.504272634700144e-06, |
| "loss": 0.5637, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.014824109605030859, |
| "grad_norm": 0.44393599033355713, |
| "learning_rate": 9.50226920608167e-06, |
| "loss": 0.806, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.014853290923150998, |
| "grad_norm": 0.350676953792572, |
| "learning_rate": 9.500261949331176e-06, |
| "loss": 0.6123, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.014882472241271138, |
| "grad_norm": 0.3527853488922119, |
| "learning_rate": 9.498250866155365e-06, |
| "loss": 0.8761, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.014911653559391278, |
| "grad_norm": 0.3412812650203705, |
| "learning_rate": 9.496235958264196e-06, |
| "loss": 0.5239, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.014940834877511418, |
| "grad_norm": 0.3844809830188751, |
| "learning_rate": 9.494217227370878e-06, |
| "loss": 0.6239, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.014970016195631557, |
| "grad_norm": 0.367498517036438, |
| "learning_rate": 9.492194675191874e-06, |
| "loss": 0.5984, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.014999197513751696, |
| "grad_norm": 0.3612300157546997, |
| "learning_rate": 9.490168303446894e-06, |
| "loss": 0.5987, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.015028378831871836, |
| "grad_norm": 0.3784268796443939, |
| "learning_rate": 9.488138113858894e-06, |
| "loss": 0.6424, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.015057560149991975, |
| "grad_norm": 0.39227673411369324, |
| "learning_rate": 9.486104108154078e-06, |
| "loss": 0.6675, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.015086741468112114, |
| "grad_norm": 0.39378803968429565, |
| "learning_rate": 9.484066288061894e-06, |
| "loss": 1.217, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.015115922786232253, |
| "grad_norm": 0.401193767786026, |
| "learning_rate": 9.482024655315033e-06, |
| "loss": 0.7383, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.015145104104352394, |
| "grad_norm": 0.3714485466480255, |
| "learning_rate": 9.47997921164943e-06, |
| "loss": 0.6934, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.015174285422472534, |
| "grad_norm": 0.36759766936302185, |
| "learning_rate": 9.47792995880426e-06, |
| "loss": 0.6782, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.015203466740592673, |
| "grad_norm": 0.28930333256721497, |
| "learning_rate": 9.475876898521933e-06, |
| "loss": 0.9872, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.015232648058712812, |
| "grad_norm": 0.37512558698654175, |
| "learning_rate": 9.4738200325481e-06, |
| "loss": 0.8655, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.015261829376832951, |
| "grad_norm": 0.3728006184101105, |
| "learning_rate": 9.471759362631645e-06, |
| "loss": 0.6292, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.01529101069495309, |
| "grad_norm": 0.33682775497436523, |
| "learning_rate": 9.469694890524693e-06, |
| "loss": 0.421, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.01532019201307323, |
| "grad_norm": 0.3591179847717285, |
| "learning_rate": 9.46762661798259e-06, |
| "loss": 0.5439, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.01534937333119337, |
| "grad_norm": 0.36836186051368713, |
| "learning_rate": 9.465554546763927e-06, |
| "loss": 0.6114, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.01537855464931351, |
| "grad_norm": 0.41732025146484375, |
| "learning_rate": 9.463478678630514e-06, |
| "loss": 0.5819, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.01540773596743365, |
| "grad_norm": 0.39521655440330505, |
| "learning_rate": 9.461399015347395e-06, |
| "loss": 0.7043, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.015436917285553789, |
| "grad_norm": 0.34663423895835876, |
| "learning_rate": 9.459315558682839e-06, |
| "loss": 0.5116, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.015466098603673928, |
| "grad_norm": 0.4674903452396393, |
| "learning_rate": 9.457228310408342e-06, |
| "loss": 0.8343, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.015495279921794067, |
| "grad_norm": 0.4569755494594574, |
| "learning_rate": 9.455137272298622e-06, |
| "loss": 1.3544, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.015524461239914206, |
| "grad_norm": 0.3249228894710541, |
| "learning_rate": 9.453042446131622e-06, |
| "loss": 0.5342, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.015553642558034346, |
| "grad_norm": 0.3393101990222931, |
| "learning_rate": 9.450943833688501e-06, |
| "loss": 0.7155, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.015582823876154487, |
| "grad_norm": 0.381415456533432, |
| "learning_rate": 9.448841436753644e-06, |
| "loss": 0.5812, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.015612005194274626, |
| "grad_norm": 0.6117174625396729, |
| "learning_rate": 9.446735257114647e-06, |
| "loss": 0.6074, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.015641186512394763, |
| "grad_norm": 0.37842848896980286, |
| "learning_rate": 9.444625296562326e-06, |
| "loss": 0.7285, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.015670367830514904, |
| "grad_norm": 0.330030232667923, |
| "learning_rate": 9.442511556890715e-06, |
| "loss": 0.5858, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.015699549148635045, |
| "grad_norm": 1.5041126012802124, |
| "learning_rate": 9.440394039897051e-06, |
| "loss": 1.4087, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.015728730466755183, |
| "grad_norm": 0.377453088760376, |
| "learning_rate": 9.438272747381795e-06, |
| "loss": 0.9429, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.015757911784875324, |
| "grad_norm": 0.33903029561042786, |
| "learning_rate": 9.436147681148612e-06, |
| "loss": 0.6086, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.01578709310299546, |
| "grad_norm": 0.3462194800376892, |
| "learning_rate": 9.434018843004373e-06, |
| "loss": 0.6849, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.015816274421115602, |
| "grad_norm": 0.3402860164642334, |
| "learning_rate": 9.431886234759163e-06, |
| "loss": 1.2328, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.01584545573923574, |
| "grad_norm": 0.4302612245082855, |
| "learning_rate": 9.429749858226265e-06, |
| "loss": 1.2947, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.01587463705735588, |
| "grad_norm": 0.35253986716270447, |
| "learning_rate": 9.427609715222175e-06, |
| "loss": 1.2404, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.015903818375476022, |
| "grad_norm": 0.33479875326156616, |
| "learning_rate": 9.425465807566581e-06, |
| "loss": 0.5581, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.01593299969359616, |
| "grad_norm": 0.5809159874916077, |
| "learning_rate": 9.42331813708238e-06, |
| "loss": 0.8207, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.0159621810117163, |
| "grad_norm": 0.5473012328147888, |
| "learning_rate": 9.421166705595668e-06, |
| "loss": 1.2136, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.015991362329836438, |
| "grad_norm": 0.3245270848274231, |
| "learning_rate": 9.419011514935733e-06, |
| "loss": 0.7202, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.01602054364795658, |
| "grad_norm": 0.3849169909954071, |
| "learning_rate": 9.416852566935066e-06, |
| "loss": 1.0979, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.016049724966076716, |
| "grad_norm": 0.46187546849250793, |
| "learning_rate": 9.414689863429347e-06, |
| "loss": 1.4207, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.016078906284196857, |
| "grad_norm": 0.37997981905937195, |
| "learning_rate": 9.412523406257454e-06, |
| "loss": 0.9302, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.016108087602316995, |
| "grad_norm": 0.3921058475971222, |
| "learning_rate": 9.410353197261453e-06, |
| "loss": 0.6097, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.016137268920437136, |
| "grad_norm": 0.31845617294311523, |
| "learning_rate": 9.408179238286604e-06, |
| "loss": 0.5447, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.016166450238557277, |
| "grad_norm": 0.3439517617225647, |
| "learning_rate": 9.406001531181351e-06, |
| "loss": 0.5904, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.016195631556677414, |
| "grad_norm": 0.37492233514785767, |
| "learning_rate": 9.403820077797328e-06, |
| "loss": 0.646, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.016224812874797555, |
| "grad_norm": 0.29898616671562195, |
| "learning_rate": 9.401634879989352e-06, |
| "loss": 0.9889, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.016253994192917693, |
| "grad_norm": 0.41377773880958557, |
| "learning_rate": 9.399445939615429e-06, |
| "loss": 0.6089, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.016283175511037834, |
| "grad_norm": 0.3867817521095276, |
| "learning_rate": 9.397253258536736e-06, |
| "loss": 1.226, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.01631235682915797, |
| "grad_norm": 0.48069629073143005, |
| "learning_rate": 9.395056838617645e-06, |
| "loss": 0.4995, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.016341538147278113, |
| "grad_norm": 0.5251441597938538, |
| "learning_rate": 9.392856681725697e-06, |
| "loss": 1.4195, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.016370719465398254, |
| "grad_norm": 0.29713502526283264, |
| "learning_rate": 9.390652789731614e-06, |
| "loss": 0.6608, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.01639990078351839, |
| "grad_norm": 0.32634323835372925, |
| "learning_rate": 9.388445164509292e-06, |
| "loss": 0.9315, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.016429082101638532, |
| "grad_norm": 0.3459722399711609, |
| "learning_rate": 9.386233807935802e-06, |
| "loss": 1.1045, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.01645826341975867, |
| "grad_norm": 0.3559359610080719, |
| "learning_rate": 9.384018721891391e-06, |
| "loss": 0.5996, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.01648744473787881, |
| "grad_norm": 0.32917076349258423, |
| "learning_rate": 9.381799908259473e-06, |
| "loss": 0.4981, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.016516626055998948, |
| "grad_norm": 0.3352927267551422, |
| "learning_rate": 9.37957736892663e-06, |
| "loss": 1.0454, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.01654580737411909, |
| "grad_norm": 0.3906092345714569, |
| "learning_rate": 9.377351105782618e-06, |
| "loss": 0.5554, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.01657498869223923, |
| "grad_norm": 0.34283196926116943, |
| "learning_rate": 9.375121120720352e-06, |
| "loss": 0.5886, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.016604170010359368, |
| "grad_norm": 0.43962612748146057, |
| "learning_rate": 9.372887415635919e-06, |
| "loss": 0.6909, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.01663335132847951, |
| "grad_norm": 0.378794401884079, |
| "learning_rate": 9.37064999242856e-06, |
| "loss": 1.3446, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.016662532646599646, |
| "grad_norm": 0.3872179090976715, |
| "learning_rate": 9.36840885300069e-06, |
| "loss": 0.6506, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.016691713964719787, |
| "grad_norm": 0.4583908021450043, |
| "learning_rate": 9.366163999257869e-06, |
| "loss": 0.6029, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.016720895282839925, |
| "grad_norm": 0.35887518525123596, |
| "learning_rate": 9.363915433108828e-06, |
| "loss": 1.2264, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.016750076600960066, |
| "grad_norm": 0.42287778854370117, |
| "learning_rate": 9.361663156465448e-06, |
| "loss": 0.8646, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.016779257919080207, |
| "grad_norm": 0.4488116204738617, |
| "learning_rate": 9.359407171242763e-06, |
| "loss": 0.6976, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.016808439237200344, |
| "grad_norm": 0.403698593378067, |
| "learning_rate": 9.357147479358968e-06, |
| "loss": 0.6684, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.016837620555320485, |
| "grad_norm": 0.31678903102874756, |
| "learning_rate": 9.3548840827354e-06, |
| "loss": 1.1962, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.016866801873440623, |
| "grad_norm": 0.37791451811790466, |
| "learning_rate": 9.352616983296557e-06, |
| "loss": 0.5818, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.016895983191560764, |
| "grad_norm": 0.4029865860939026, |
| "learning_rate": 9.350346182970072e-06, |
| "loss": 1.1947, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.0169251645096809, |
| "grad_norm": 0.4236256778240204, |
| "learning_rate": 9.34807168368674e-06, |
| "loss": 0.8075, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.016954345827801042, |
| "grad_norm": 0.3434098958969116, |
| "learning_rate": 9.345793487380485e-06, |
| "loss": 0.6306, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.01698352714592118, |
| "grad_norm": 0.44345423579216003, |
| "learning_rate": 9.34351159598839e-06, |
| "loss": 1.0305, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.01701270846404132, |
| "grad_norm": 0.362965852022171, |
| "learning_rate": 9.34122601145067e-06, |
| "loss": 0.5324, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.01704188978216146, |
| "grad_norm": 0.32774803042411804, |
| "learning_rate": 9.33893673571068e-06, |
| "loss": 0.5931, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.0170710711002816, |
| "grad_norm": 0.3581222891807556, |
| "learning_rate": 9.336643770714919e-06, |
| "loss": 0.6989, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.01710025241840174, |
| "grad_norm": 0.38386085629463196, |
| "learning_rate": 9.33434711841302e-06, |
| "loss": 0.4907, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.017129433736521878, |
| "grad_norm": 0.3206281065940857, |
| "learning_rate": 9.332046780757751e-06, |
| "loss": 0.5303, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.01715861505464202, |
| "grad_norm": 0.41021135449409485, |
| "learning_rate": 9.329742759705012e-06, |
| "loss": 1.0549, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.017187796372762156, |
| "grad_norm": 0.3143759071826935, |
| "learning_rate": 9.327435057213839e-06, |
| "loss": 0.596, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.017216977690882297, |
| "grad_norm": 0.5960498452186584, |
| "learning_rate": 9.325123675246395e-06, |
| "loss": 0.7148, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.017246159009002438, |
| "grad_norm": 0.3247166574001312, |
| "learning_rate": 9.32280861576797e-06, |
| "loss": 1.5049, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.017275340327122576, |
| "grad_norm": 0.3434315621852875, |
| "learning_rate": 9.320489880746988e-06, |
| "loss": 1.1187, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.017304521645242717, |
| "grad_norm": 0.3348698914051056, |
| "learning_rate": 9.31816747215499e-06, |
| "loss": 0.5795, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.017333702963362854, |
| "grad_norm": 0.36180034279823303, |
| "learning_rate": 9.315841391966645e-06, |
| "loss": 1.1146, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.017362884281482995, |
| "grad_norm": 0.3520008325576782, |
| "learning_rate": 9.313511642159743e-06, |
| "loss": 0.5485, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.017392065599603133, |
| "grad_norm": 0.37942084670066833, |
| "learning_rate": 9.311178224715193e-06, |
| "loss": 0.6103, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.017421246917723274, |
| "grad_norm": 0.37695202231407166, |
| "learning_rate": 9.308841141617023e-06, |
| "loss": 1.0229, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.017450428235843415, |
| "grad_norm": 0.3322283923625946, |
| "learning_rate": 9.30650039485238e-06, |
| "loss": 0.4187, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.017479609553963552, |
| "grad_norm": 0.370734840631485, |
| "learning_rate": 9.304155986411522e-06, |
| "loss": 0.5859, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.017508790872083693, |
| "grad_norm": 0.396634578704834, |
| "learning_rate": 9.301807918287826e-06, |
| "loss": 0.555, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.01753797219020383, |
| "grad_norm": 0.4389735162258148, |
| "learning_rate": 9.299456192477776e-06, |
| "loss": 0.3964, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.017567153508323972, |
| "grad_norm": 0.3570210933685303, |
| "learning_rate": 9.297100810980966e-06, |
| "loss": 0.5599, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.01759633482644411, |
| "grad_norm": 0.3596435785293579, |
| "learning_rate": 9.2947417758001e-06, |
| "loss": 0.5995, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.01762551614456425, |
| "grad_norm": 0.36702147126197815, |
| "learning_rate": 9.29237908894099e-06, |
| "loss": 1.3397, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.017654697462684388, |
| "grad_norm": 0.3350675404071808, |
| "learning_rate": 9.29001275241255e-06, |
| "loss": 0.5672, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.01768387878080453, |
| "grad_norm": 0.3603138029575348, |
| "learning_rate": 9.287642768226798e-06, |
| "loss": 1.1652, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.01771306009892467, |
| "grad_norm": 0.3201417028903961, |
| "learning_rate": 9.285269138398855e-06, |
| "loss": 1.1791, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.017742241417044807, |
| "grad_norm": 0.33644813299179077, |
| "learning_rate": 9.282891864946941e-06, |
| "loss": 0.4735, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.01777142273516495, |
| "grad_norm": 0.41625645756721497, |
| "learning_rate": 9.280510949892374e-06, |
| "loss": 1.2444, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.017800604053285086, |
| "grad_norm": 0.4573598802089691, |
| "learning_rate": 9.278126395259566e-06, |
| "loss": 0.9899, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.017829785371405227, |
| "grad_norm": 0.39679181575775146, |
| "learning_rate": 9.275738203076026e-06, |
| "loss": 0.7262, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.017858966689525364, |
| "grad_norm": 0.5160564184188843, |
| "learning_rate": 9.273346375372357e-06, |
| "loss": 1.7396, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.017888148007645505, |
| "grad_norm": 0.32972249388694763, |
| "learning_rate": 9.270950914182251e-06, |
| "loss": 0.5268, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.017917329325765646, |
| "grad_norm": 0.3377739489078522, |
| "learning_rate": 9.26855182154249e-06, |
| "loss": 0.6258, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.017946510643885784, |
| "grad_norm": 0.3465003967285156, |
| "learning_rate": 9.266149099492946e-06, |
| "loss": 0.4747, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.017975691962005925, |
| "grad_norm": 0.3364008963108063, |
| "learning_rate": 9.263742750076571e-06, |
| "loss": 0.6306, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.018004873280126062, |
| "grad_norm": 0.33820804953575134, |
| "learning_rate": 9.261332775339408e-06, |
| "loss": 0.4394, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.018034054598246203, |
| "grad_norm": 0.3506802022457123, |
| "learning_rate": 9.258919177330578e-06, |
| "loss": 0.488, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.01806323591636634, |
| "grad_norm": 0.43290603160858154, |
| "learning_rate": 9.256501958102287e-06, |
| "loss": 0.9228, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.018092417234486482, |
| "grad_norm": 0.3961278200149536, |
| "learning_rate": 9.254081119709813e-06, |
| "loss": 1.1781, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.018121598552606623, |
| "grad_norm": 0.29195985198020935, |
| "learning_rate": 9.251656664211519e-06, |
| "loss": 0.5739, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.01815077987072676, |
| "grad_norm": 0.32894715666770935, |
| "learning_rate": 9.249228593668838e-06, |
| "loss": 0.5459, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.0181799611888469, |
| "grad_norm": 0.35383424162864685, |
| "learning_rate": 9.24679691014628e-06, |
| "loss": 0.4321, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.01820914250696704, |
| "grad_norm": 0.3357163667678833, |
| "learning_rate": 9.244361615711428e-06, |
| "loss": 0.6703, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.01823832382508718, |
| "grad_norm": 0.3436160087585449, |
| "learning_rate": 9.241922712434928e-06, |
| "loss": 0.4953, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.018267505143207317, |
| "grad_norm": 0.3420107662677765, |
| "learning_rate": 9.239480202390504e-06, |
| "loss": 0.5976, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.01829668646132746, |
| "grad_norm": 0.3350810408592224, |
| "learning_rate": 9.237034087654941e-06, |
| "loss": 0.4893, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.0183258677794476, |
| "grad_norm": 0.3578342795372009, |
| "learning_rate": 9.234584370308089e-06, |
| "loss": 0.506, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.018355049097567737, |
| "grad_norm": 0.3859279751777649, |
| "learning_rate": 9.232131052432861e-06, |
| "loss": 0.9119, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.018384230415687878, |
| "grad_norm": 0.3894137144088745, |
| "learning_rate": 9.22967413611524e-06, |
| "loss": 0.7624, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.018413411733808015, |
| "grad_norm": 0.3441254794597626, |
| "learning_rate": 9.227213623444253e-06, |
| "loss": 1.0488, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.018442593051928156, |
| "grad_norm": 0.31893157958984375, |
| "learning_rate": 9.224749516512e-06, |
| "loss": 1.0957, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.018471774370048294, |
| "grad_norm": 0.3425973057746887, |
| "learning_rate": 9.222281817413622e-06, |
| "loss": 0.6559, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.018500955688168435, |
| "grad_norm": 0.4549978971481323, |
| "learning_rate": 9.219810528247332e-06, |
| "loss": 0.9401, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.018530137006288572, |
| "grad_norm": 0.41166195273399353, |
| "learning_rate": 9.217335651114384e-06, |
| "loss": 1.0252, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.018559318324408713, |
| "grad_norm": 0.37333980202674866, |
| "learning_rate": 9.21485718811908e-06, |
| "loss": 1.5015, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.018588499642528854, |
| "grad_norm": 0.3657039403915405, |
| "learning_rate": 9.212375141368779e-06, |
| "loss": 0.5507, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.018617680960648992, |
| "grad_norm": 0.4299660623073578, |
| "learning_rate": 9.209889512973884e-06, |
| "loss": 0.6796, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.018646862278769133, |
| "grad_norm": 0.35656315088272095, |
| "learning_rate": 9.207400305047846e-06, |
| "loss": 0.4611, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.01867604359688927, |
| "grad_norm": 0.38147053122520447, |
| "learning_rate": 9.204907519707151e-06, |
| "loss": 0.6197, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.01870522491500941, |
| "grad_norm": 0.3152889311313629, |
| "learning_rate": 9.20241115907134e-06, |
| "loss": 0.7449, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.01873440623312955, |
| "grad_norm": 0.467544823884964, |
| "learning_rate": 9.199911225262981e-06, |
| "loss": 1.8084, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.01876358755124969, |
| "grad_norm": 0.3269745111465454, |
| "learning_rate": 9.197407720407687e-06, |
| "loss": 1.1264, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.01879276886936983, |
| "grad_norm": 0.36698785424232483, |
| "learning_rate": 9.194900646634107e-06, |
| "loss": 0.6801, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.01882195018748997, |
| "grad_norm": 0.32644808292388916, |
| "learning_rate": 9.192390006073924e-06, |
| "loss": 0.4864, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.01885113150561011, |
| "grad_norm": 0.458908349275589, |
| "learning_rate": 9.189875800861854e-06, |
| "loss": 0.8734, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.018880312823730247, |
| "grad_norm": 0.3606187105178833, |
| "learning_rate": 9.18735803313564e-06, |
| "loss": 1.1729, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.018909494141850388, |
| "grad_norm": 0.37290409207344055, |
| "learning_rate": 9.184836705036062e-06, |
| "loss": 0.6501, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.018938675459970526, |
| "grad_norm": 0.36168158054351807, |
| "learning_rate": 9.182311818706919e-06, |
| "loss": 1.0998, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.018967856778090667, |
| "grad_norm": 0.392546683549881, |
| "learning_rate": 9.179783376295042e-06, |
| "loss": 0.6509, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.018997038096210807, |
| "grad_norm": 0.4019841253757477, |
| "learning_rate": 9.17725137995028e-06, |
| "loss": 0.5996, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.019026219414330945, |
| "grad_norm": 0.3839268982410431, |
| "learning_rate": 9.174715831825507e-06, |
| "loss": 0.7207, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.019055400732451086, |
| "grad_norm": 0.4698973000049591, |
| "learning_rate": 9.17217673407662e-06, |
| "loss": 1.9878, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.019084582050571224, |
| "grad_norm": 0.37389200925827026, |
| "learning_rate": 9.169634088862527e-06, |
| "loss": 0.3477, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.019113763368691365, |
| "grad_norm": 0.5314019918441772, |
| "learning_rate": 9.167087898345156e-06, |
| "loss": 1.0018, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.019142944686811502, |
| "grad_norm": 0.39931797981262207, |
| "learning_rate": 9.164538164689452e-06, |
| "loss": 0.6502, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.019172126004931643, |
| "grad_norm": 0.3246656358242035, |
| "learning_rate": 9.161984890063367e-06, |
| "loss": 0.5259, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.01920130732305178, |
| "grad_norm": 0.3903149366378784, |
| "learning_rate": 9.159428076637868e-06, |
| "loss": 1.0821, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.01923048864117192, |
| "grad_norm": 0.39948946237564087, |
| "learning_rate": 9.15686772658693e-06, |
| "loss": 0.4597, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.019259669959292063, |
| "grad_norm": 0.36726510524749756, |
| "learning_rate": 9.154303842087535e-06, |
| "loss": 0.7144, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.0192888512774122, |
| "grad_norm": 0.3669425845146179, |
| "learning_rate": 9.151736425319669e-06, |
| "loss": 1.2843, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.01931803259553234, |
| "grad_norm": 0.3675820827484131, |
| "learning_rate": 9.149165478466325e-06, |
| "loss": 0.5808, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.01934721391365248, |
| "grad_norm": 0.3312235474586487, |
| "learning_rate": 9.14659100371349e-06, |
| "loss": 0.5777, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.01937639523177262, |
| "grad_norm": 0.3757508099079132, |
| "learning_rate": 9.14401300325016e-06, |
| "loss": 0.5729, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.019405576549892757, |
| "grad_norm": 0.3261760175228119, |
| "learning_rate": 9.141431479268323e-06, |
| "loss": 0.5035, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.019434757868012898, |
| "grad_norm": 0.4490210711956024, |
| "learning_rate": 9.138846433962962e-06, |
| "loss": 0.9402, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.01946393918613304, |
| "grad_norm": 0.4542470872402191, |
| "learning_rate": 9.136257869532062e-06, |
| "loss": 0.8471, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.019493120504253177, |
| "grad_norm": 0.34423303604125977, |
| "learning_rate": 9.133665788176588e-06, |
| "loss": 0.4733, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.019522301822373318, |
| "grad_norm": 0.32796233892440796, |
| "learning_rate": 9.131070192100506e-06, |
| "loss": 0.5029, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.019551483140493455, |
| "grad_norm": 0.3568125069141388, |
| "learning_rate": 9.128471083510764e-06, |
| "loss": 0.7389, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.019580664458613596, |
| "grad_norm": 0.33445391058921814, |
| "learning_rate": 9.1258684646173e-06, |
| "loss": 1.0303, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.019609845776733734, |
| "grad_norm": 0.36570268869400024, |
| "learning_rate": 9.123262337633037e-06, |
| "loss": 0.9279, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.019639027094853875, |
| "grad_norm": 0.45482945442199707, |
| "learning_rate": 9.120652704773877e-06, |
| "loss": 0.4813, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.019668208412974016, |
| "grad_norm": 0.307086706161499, |
| "learning_rate": 9.118039568258707e-06, |
| "loss": 1.0467, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.019697389731094153, |
| "grad_norm": 0.3791542649269104, |
| "learning_rate": 9.11542293030939e-06, |
| "loss": 0.6916, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.019726571049214294, |
| "grad_norm": 0.349894642829895, |
| "learning_rate": 9.112802793150768e-06, |
| "loss": 0.565, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.01975575236733443, |
| "grad_norm": 0.3533949851989746, |
| "learning_rate": 9.11017915901066e-06, |
| "loss": 0.6122, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.019784933685454573, |
| "grad_norm": 0.29272642731666565, |
| "learning_rate": 9.107552030119852e-06, |
| "loss": 0.4065, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.01981411500357471, |
| "grad_norm": 0.33422765135765076, |
| "learning_rate": 9.104921408712109e-06, |
| "loss": 1.0776, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.01984329632169485, |
| "grad_norm": 0.38381484150886536, |
| "learning_rate": 9.10228729702416e-06, |
| "loss": 0.6844, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.019872477639814992, |
| "grad_norm": 0.3098258376121521, |
| "learning_rate": 9.099649697295705e-06, |
| "loss": 0.4116, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.01990165895793513, |
| "grad_norm": 0.26194438338279724, |
| "learning_rate": 9.097008611769409e-06, |
| "loss": 0.374, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.01993084027605527, |
| "grad_norm": 0.4125596582889557, |
| "learning_rate": 9.0943640426909e-06, |
| "loss": 1.1768, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.019960021594175408, |
| "grad_norm": 0.32758772373199463, |
| "learning_rate": 9.09171599230877e-06, |
| "loss": 0.9924, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.01998920291229555, |
| "grad_norm": 0.3490470051765442, |
| "learning_rate": 9.089064462874567e-06, |
| "loss": 0.553, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.020018384230415687, |
| "grad_norm": 0.3255799412727356, |
| "learning_rate": 9.0864094566428e-06, |
| "loss": 1.2142, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.020047565548535828, |
| "grad_norm": 0.3007728159427643, |
| "learning_rate": 9.083750975870934e-06, |
| "loss": 1.0844, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.020076746866655965, |
| "grad_norm": 0.32895627617836, |
| "learning_rate": 9.08108902281939e-06, |
| "loss": 1.1762, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.020105928184776106, |
| "grad_norm": 0.38072332739830017, |
| "learning_rate": 9.078423599751535e-06, |
| "loss": 1.1305, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.020135109502896247, |
| "grad_norm": 0.3886689841747284, |
| "learning_rate": 9.075754708933698e-06, |
| "loss": 0.634, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.020164290821016385, |
| "grad_norm": 0.33491384983062744, |
| "learning_rate": 9.073082352635143e-06, |
| "loss": 1.0068, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.020193472139136526, |
| "grad_norm": 0.41087162494659424, |
| "learning_rate": 9.07040653312809e-06, |
| "loss": 0.8065, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.020222653457256663, |
| "grad_norm": 0.3778280019760132, |
| "learning_rate": 9.0677272526877e-06, |
| "loss": 0.5152, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.020251834775376804, |
| "grad_norm": 0.25806868076324463, |
| "learning_rate": 9.065044513592079e-06, |
| "loss": 0.5269, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.020281016093496942, |
| "grad_norm": 0.3023952841758728, |
| "learning_rate": 9.062358318122268e-06, |
| "loss": 0.3871, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.020310197411617083, |
| "grad_norm": 0.41993364691734314, |
| "learning_rate": 9.059668668562256e-06, |
| "loss": 2.1125, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.020339378729737224, |
| "grad_norm": 0.3693355619907379, |
| "learning_rate": 9.056975567198962e-06, |
| "loss": 0.4642, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.02036856004785736, |
| "grad_norm": 0.39784207940101624, |
| "learning_rate": 9.05427901632224e-06, |
| "loss": 0.698, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.020397741365977502, |
| "grad_norm": 0.3644876778125763, |
| "learning_rate": 9.05157901822488e-06, |
| "loss": 0.6611, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.02042692268409764, |
| "grad_norm": 0.3227871060371399, |
| "learning_rate": 9.048875575202603e-06, |
| "loss": 0.4825, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.02045610400221778, |
| "grad_norm": 0.35657012462615967, |
| "learning_rate": 9.046168689554056e-06, |
| "loss": 0.5982, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.02048528532033792, |
| "grad_norm": 0.36145198345184326, |
| "learning_rate": 9.04345836358082e-06, |
| "loss": 0.6323, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.02051446663845806, |
| "grad_norm": 0.3020861744880676, |
| "learning_rate": 9.04074459958739e-06, |
| "loss": 0.3481, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.0205436479565782, |
| "grad_norm": 0.3794925808906555, |
| "learning_rate": 9.038027399881193e-06, |
| "loss": 1.6229, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.020572829274698338, |
| "grad_norm": 0.45398277044296265, |
| "learning_rate": 9.035306766772575e-06, |
| "loss": 0.8658, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.02060201059281848, |
| "grad_norm": 0.3466812074184418, |
| "learning_rate": 9.032582702574805e-06, |
| "loss": 1.6428, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.020631191910938616, |
| "grad_norm": 0.3583056330680847, |
| "learning_rate": 9.02985520960406e-06, |
| "loss": 0.6126, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.020660373229058757, |
| "grad_norm": 0.36726686358451843, |
| "learning_rate": 9.027124290179442e-06, |
| "loss": 0.6597, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.020689554547178895, |
| "grad_norm": 0.4753471314907074, |
| "learning_rate": 9.024389946622957e-06, |
| "loss": 1.0582, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.020718735865299036, |
| "grad_norm": 0.4432185888290405, |
| "learning_rate": 9.021652181259532e-06, |
| "loss": 0.7266, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.020747917183419173, |
| "grad_norm": 0.3590591549873352, |
| "learning_rate": 9.018910996417001e-06, |
| "loss": 0.5362, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.020777098501539314, |
| "grad_norm": 0.33765435218811035, |
| "learning_rate": 9.0161663944261e-06, |
| "loss": 0.542, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.020806279819659455, |
| "grad_norm": 0.3730981647968292, |
| "learning_rate": 9.013418377620475e-06, |
| "loss": 0.7046, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.020835461137779593, |
| "grad_norm": 0.30674096941947937, |
| "learning_rate": 9.010666948336674e-06, |
| "loss": 0.4224, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.020864642455899734, |
| "grad_norm": 0.4588209092617035, |
| "learning_rate": 9.00791210891415e-06, |
| "loss": 1.8732, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.02089382377401987, |
| "grad_norm": 0.31421759724617004, |
| "learning_rate": 9.005153861695248e-06, |
| "loss": 1.0239, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.020923005092140012, |
| "grad_norm": 0.3457552194595337, |
| "learning_rate": 9.00239220902522e-06, |
| "loss": 0.5863, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.02095218641026015, |
| "grad_norm": 0.30695098638534546, |
| "learning_rate": 8.999627153252207e-06, |
| "loss": 0.382, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.02098136772838029, |
| "grad_norm": 0.3320595920085907, |
| "learning_rate": 8.996858696727244e-06, |
| "loss": 1.1527, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.021010549046500432, |
| "grad_norm": 0.3089519143104553, |
| "learning_rate": 8.994086841804261e-06, |
| "loss": 1.1181, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.02103973036462057, |
| "grad_norm": 0.36267155408859253, |
| "learning_rate": 8.991311590840075e-06, |
| "loss": 0.6898, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.02106891168274071, |
| "grad_norm": 0.29903867840766907, |
| "learning_rate": 8.988532946194391e-06, |
| "loss": 0.4089, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.021098093000860848, |
| "grad_norm": 0.40361472964286804, |
| "learning_rate": 8.985750910229799e-06, |
| "loss": 0.5998, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.02112727431898099, |
| "grad_norm": 0.33275106549263, |
| "learning_rate": 8.982965485311772e-06, |
| "loss": 0.8022, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.021156455637101126, |
| "grad_norm": 0.4993535876274109, |
| "learning_rate": 8.980176673808665e-06, |
| "loss": 0.5456, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.021185636955221267, |
| "grad_norm": 0.3588978052139282, |
| "learning_rate": 8.977384478091717e-06, |
| "loss": 0.4851, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.02121481827334141, |
| "grad_norm": 0.32795020937919617, |
| "learning_rate": 8.974588900535039e-06, |
| "loss": 0.4727, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.021243999591461546, |
| "grad_norm": 0.3253801465034485, |
| "learning_rate": 8.971789943515617e-06, |
| "loss": 0.5685, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.021273180909581687, |
| "grad_norm": 0.3399464190006256, |
| "learning_rate": 8.968987609413313e-06, |
| "loss": 0.8761, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.021302362227701824, |
| "grad_norm": 0.4252973198890686, |
| "learning_rate": 8.966181900610861e-06, |
| "loss": 0.5743, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.021331543545821965, |
| "grad_norm": 0.34016600251197815, |
| "learning_rate": 8.963372819493864e-06, |
| "loss": 0.7273, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.021360724863942103, |
| "grad_norm": 0.36940544843673706, |
| "learning_rate": 8.960560368450791e-06, |
| "loss": 0.6165, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.021389906182062244, |
| "grad_norm": 0.34300991892814636, |
| "learning_rate": 8.957744549872977e-06, |
| "loss": 0.8254, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.021419087500182385, |
| "grad_norm": 0.41706210374832153, |
| "learning_rate": 8.95492536615462e-06, |
| "loss": 0.8383, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.021448268818302522, |
| "grad_norm": 0.3056439161300659, |
| "learning_rate": 8.952102819692776e-06, |
| "loss": 0.5115, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.021477450136422663, |
| "grad_norm": 0.3283182680606842, |
| "learning_rate": 8.94927691288737e-06, |
| "loss": 1.145, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.0215066314545428, |
| "grad_norm": 0.33984071016311646, |
| "learning_rate": 8.946447648141175e-06, |
| "loss": 0.3876, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.021535812772662942, |
| "grad_norm": 0.38997402787208557, |
| "learning_rate": 8.943615027859823e-06, |
| "loss": 1.0404, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.02156499409078308, |
| "grad_norm": 0.4195142984390259, |
| "learning_rate": 8.940779054451796e-06, |
| "loss": 0.4995, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.02159417540890322, |
| "grad_norm": 0.31568676233291626, |
| "learning_rate": 8.93793973032843e-06, |
| "loss": 0.4636, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.021623356727023358, |
| "grad_norm": 0.3206363022327423, |
| "learning_rate": 8.935097057903911e-06, |
| "loss": 0.4516, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.0216525380451435, |
| "grad_norm": 0.4849179685115814, |
| "learning_rate": 8.932251039595272e-06, |
| "loss": 0.7088, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.02168171936326364, |
| "grad_norm": 0.37452441453933716, |
| "learning_rate": 8.929401677822384e-06, |
| "loss": 0.4801, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.021710900681383778, |
| "grad_norm": 0.46993106603622437, |
| "learning_rate": 8.92654897500797e-06, |
| "loss": 0.5063, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.02174008199950392, |
| "grad_norm": 0.3480769693851471, |
| "learning_rate": 8.923692933577587e-06, |
| "loss": 1.0598, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.021769263317624056, |
| "grad_norm": 0.37334728240966797, |
| "learning_rate": 8.920833555959636e-06, |
| "loss": 0.6099, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.021798444635744197, |
| "grad_norm": 0.3175983428955078, |
| "learning_rate": 8.917970844585349e-06, |
| "loss": 0.6018, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.021827625953864335, |
| "grad_norm": 0.3213447630405426, |
| "learning_rate": 8.915104801888798e-06, |
| "loss": 1.2151, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.021856807271984476, |
| "grad_norm": 0.3635963797569275, |
| "learning_rate": 8.912235430306888e-06, |
| "loss": 0.5717, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.021885988590104617, |
| "grad_norm": 0.3699709177017212, |
| "learning_rate": 8.909362732279345e-06, |
| "loss": 0.6317, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.021915169908224754, |
| "grad_norm": 0.4695335626602173, |
| "learning_rate": 8.906486710248734e-06, |
| "loss": 1.2876, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.021944351226344895, |
| "grad_norm": 0.5570040345191956, |
| "learning_rate": 8.903607366660442e-06, |
| "loss": 0.7603, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.021973532544465033, |
| "grad_norm": 0.4375017285346985, |
| "learning_rate": 8.900724703962682e-06, |
| "loss": 0.8908, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.022002713862585174, |
| "grad_norm": 0.6062150001525879, |
| "learning_rate": 8.897838724606485e-06, |
| "loss": 1.8548, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.02203189518070531, |
| "grad_norm": 0.3286692798137665, |
| "learning_rate": 8.894949431045705e-06, |
| "loss": 0.5127, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.022061076498825452, |
| "grad_norm": 0.4079289734363556, |
| "learning_rate": 8.892056825737015e-06, |
| "loss": 0.7608, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.022090257816945593, |
| "grad_norm": 0.30061861872673035, |
| "learning_rate": 8.8891609111399e-06, |
| "loss": 0.3975, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.02211943913506573, |
| "grad_norm": 0.6648856401443481, |
| "learning_rate": 8.886261689716667e-06, |
| "loss": 0.575, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.02214862045318587, |
| "grad_norm": 0.43923717737197876, |
| "learning_rate": 8.883359163932422e-06, |
| "loss": 1.3487, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.02217780177130601, |
| "grad_norm": 0.3591349124908447, |
| "learning_rate": 8.880453336255093e-06, |
| "loss": 1.1973, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.02220698308942615, |
| "grad_norm": 0.3670094907283783, |
| "learning_rate": 8.877544209155406e-06, |
| "loss": 0.7514, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.022236164407546288, |
| "grad_norm": 0.3374556601047516, |
| "learning_rate": 8.8746317851069e-06, |
| "loss": 0.4059, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.02226534572566643, |
| "grad_norm": 0.34597352147102356, |
| "learning_rate": 8.871716066585911e-06, |
| "loss": 0.4653, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.022294527043786566, |
| "grad_norm": 0.4421803951263428, |
| "learning_rate": 8.868797056071581e-06, |
| "loss": 0.999, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.022323708361906707, |
| "grad_norm": 0.39163121581077576, |
| "learning_rate": 8.865874756045849e-06, |
| "loss": 0.6577, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.022352889680026848, |
| "grad_norm": 0.38601264357566833, |
| "learning_rate": 8.86294916899345e-06, |
| "loss": 0.6135, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.022382070998146986, |
| "grad_norm": 0.3637295961380005, |
| "learning_rate": 8.860020297401912e-06, |
| "loss": 0.7322, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.022411252316267127, |
| "grad_norm": 0.3655218183994293, |
| "learning_rate": 8.857088143761563e-06, |
| "loss": 0.9147, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.022440433634387264, |
| "grad_norm": 0.34286636114120483, |
| "learning_rate": 8.854152710565517e-06, |
| "loss": 0.4775, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.022469614952507405, |
| "grad_norm": 0.40919309854507446, |
| "learning_rate": 8.851214000309674e-06, |
| "loss": 1.642, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.022498796270627543, |
| "grad_norm": 0.3900407552719116, |
| "learning_rate": 8.848272015492725e-06, |
| "loss": 0.6516, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.022527977588747684, |
| "grad_norm": 0.4155939221382141, |
| "learning_rate": 8.845326758616144e-06, |
| "loss": 0.4094, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.022557158906867825, |
| "grad_norm": 1.441678524017334, |
| "learning_rate": 8.842378232184184e-06, |
| "loss": 0.9085, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.022586340224987962, |
| "grad_norm": 0.5478386878967285, |
| "learning_rate": 8.839426438703881e-06, |
| "loss": 1.386, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.022615521543108103, |
| "grad_norm": 0.41432473063468933, |
| "learning_rate": 8.83647138068505e-06, |
| "loss": 0.6071, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.02264470286122824, |
| "grad_norm": 0.4197017252445221, |
| "learning_rate": 8.833513060640278e-06, |
| "loss": 0.6876, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.02267388417934838, |
| "grad_norm": 0.3203357756137848, |
| "learning_rate": 8.83055148108493e-06, |
| "loss": 0.5771, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.02270306549746852, |
| "grad_norm": 0.4171014726161957, |
| "learning_rate": 8.827586644537138e-06, |
| "loss": 1.1253, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.02273224681558866, |
| "grad_norm": 0.40273237228393555, |
| "learning_rate": 8.824618553517806e-06, |
| "loss": 0.7336, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.0227614281337088, |
| "grad_norm": 0.3011469542980194, |
| "learning_rate": 8.821647210550604e-06, |
| "loss": 0.3521, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.02279060945182894, |
| "grad_norm": 0.41459038853645325, |
| "learning_rate": 8.818672618161969e-06, |
| "loss": 1.8689, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.02281979076994908, |
| "grad_norm": 0.35542216897010803, |
| "learning_rate": 8.8156947788811e-06, |
| "loss": 0.5669, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.022848972088069217, |
| "grad_norm": 0.29437392950057983, |
| "learning_rate": 8.812713695239952e-06, |
| "loss": 0.4459, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.022878153406189358, |
| "grad_norm": 0.4285639822483063, |
| "learning_rate": 8.809729369773247e-06, |
| "loss": 0.8242, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.022907334724309496, |
| "grad_norm": 0.5871285796165466, |
| "learning_rate": 8.806741805018457e-06, |
| "loss": 0.5645, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.022936516042429637, |
| "grad_norm": 0.39298170804977417, |
| "learning_rate": 8.803751003515813e-06, |
| "loss": 0.693, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.022965697360549778, |
| "grad_norm": 0.3813253343105316, |
| "learning_rate": 8.800756967808292e-06, |
| "loss": 0.4817, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.022994878678669915, |
| "grad_norm": 0.3398251235485077, |
| "learning_rate": 8.797759700441628e-06, |
| "loss": 1.1011, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.023024059996790056, |
| "grad_norm": 0.3503631353378296, |
| "learning_rate": 8.794759203964297e-06, |
| "loss": 0.597, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.023053241314910194, |
| "grad_norm": 0.37430325150489807, |
| "learning_rate": 8.791755480927524e-06, |
| "loss": 0.5188, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.023082422633030335, |
| "grad_norm": 0.4173000156879425, |
| "learning_rate": 8.788748533885276e-06, |
| "loss": 0.6229, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.023111603951150472, |
| "grad_norm": 0.35245805978775024, |
| "learning_rate": 8.78573836539426e-06, |
| "loss": 0.4719, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.023140785269270613, |
| "grad_norm": 0.34969428181648254, |
| "learning_rate": 8.782724978013926e-06, |
| "loss": 0.6204, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.02316996658739075, |
| "grad_norm": 0.6185358166694641, |
| "learning_rate": 8.779708374306457e-06, |
| "loss": 0.9829, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.023199147905510892, |
| "grad_norm": 0.5319929122924805, |
| "learning_rate": 8.776688556836772e-06, |
| "loss": 1.6485, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.023228329223631033, |
| "grad_norm": 0.3248120844364166, |
| "learning_rate": 8.773665528172525e-06, |
| "loss": 1.1377, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.02325751054175117, |
| "grad_norm": 0.3441081941127777, |
| "learning_rate": 8.770639290884098e-06, |
| "loss": 0.4342, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.02328669185987131, |
| "grad_norm": 0.3412538170814514, |
| "learning_rate": 8.767609847544598e-06, |
| "loss": 0.6642, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.02331587317799145, |
| "grad_norm": 0.36332061886787415, |
| "learning_rate": 8.764577200729864e-06, |
| "loss": 1.0804, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.02334505449611159, |
| "grad_norm": 0.35930174589157104, |
| "learning_rate": 8.76154135301846e-06, |
| "loss": 0.3421, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.023374235814231727, |
| "grad_norm": 0.4602378308773041, |
| "learning_rate": 8.75850230699166e-06, |
| "loss": 0.8681, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.02340341713235187, |
| "grad_norm": 0.34386616945266724, |
| "learning_rate": 8.75546006523347e-06, |
| "loss": 0.5401, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.02343259845047201, |
| "grad_norm": 0.3538525402545929, |
| "learning_rate": 8.752414630330607e-06, |
| "loss": 0.478, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.023461779768592147, |
| "grad_norm": 0.47124069929122925, |
| "learning_rate": 8.749366004872508e-06, |
| "loss": 0.7634, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.023490961086712288, |
| "grad_norm": 0.37605535984039307, |
| "learning_rate": 8.746314191451314e-06, |
| "loss": 0.6628, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.023520142404832425, |
| "grad_norm": 0.35266220569610596, |
| "learning_rate": 8.743259192661887e-06, |
| "loss": 0.6088, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.023549323722952566, |
| "grad_norm": 0.3753415644168854, |
| "learning_rate": 8.740201011101787e-06, |
| "loss": 0.656, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.023578505041072704, |
| "grad_norm": 0.3856053054332733, |
| "learning_rate": 8.73713964937129e-06, |
| "loss": 0.8337, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.023607686359192845, |
| "grad_norm": 0.30175161361694336, |
| "learning_rate": 8.734075110073371e-06, |
| "loss": 0.4284, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.023636867677312986, |
| "grad_norm": 0.40826019644737244, |
| "learning_rate": 8.731007395813706e-06, |
| "loss": 1.1045, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.023666048995433123, |
| "grad_norm": 0.3330915570259094, |
| "learning_rate": 8.727936509200672e-06, |
| "loss": 0.6175, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.023695230313553264, |
| "grad_norm": 0.43420398235321045, |
| "learning_rate": 8.724862452845345e-06, |
| "loss": 1.8371, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.023724411631673402, |
| "grad_norm": 0.4110957086086273, |
| "learning_rate": 8.721785229361493e-06, |
| "loss": 1.7118, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.023753592949793543, |
| "grad_norm": 0.41805300116539, |
| "learning_rate": 8.718704841365578e-06, |
| "loss": 1.2369, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.02378277426791368, |
| "grad_norm": 0.32465025782585144, |
| "learning_rate": 8.715621291476754e-06, |
| "loss": 0.5945, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.02381195558603382, |
| "grad_norm": 0.36631637811660767, |
| "learning_rate": 8.712534582316862e-06, |
| "loss": 0.63, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.02384113690415396, |
| "grad_norm": 0.3308427631855011, |
| "learning_rate": 8.709444716510429e-06, |
| "loss": 0.736, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.0238703182222741, |
| "grad_norm": 0.3967353403568268, |
| "learning_rate": 8.706351696684668e-06, |
| "loss": 0.798, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.02389949954039424, |
| "grad_norm": 0.4291444718837738, |
| "learning_rate": 8.703255525469471e-06, |
| "loss": 0.6946, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.02392868085851438, |
| "grad_norm": 0.3692452311515808, |
| "learning_rate": 8.700156205497415e-06, |
| "loss": 1.7867, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.02395786217663452, |
| "grad_norm": 0.36359903216362, |
| "learning_rate": 8.697053739403742e-06, |
| "loss": 0.5031, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.023987043494754657, |
| "grad_norm": 0.303137868642807, |
| "learning_rate": 8.693948129826386e-06, |
| "loss": 0.5001, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.024016224812874798, |
| "grad_norm": 0.37102386355400085, |
| "learning_rate": 8.69083937940594e-06, |
| "loss": 0.6871, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.024045406130994935, |
| "grad_norm": 0.3570660650730133, |
| "learning_rate": 8.687727490785676e-06, |
| "loss": 1.2201, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.024074587449115076, |
| "grad_norm": 0.40938523411750793, |
| "learning_rate": 8.684612466611526e-06, |
| "loss": 0.7307, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.024103768767235217, |
| "grad_norm": 0.34717899560928345, |
| "learning_rate": 8.681494309532099e-06, |
| "loss": 0.5842, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.024132950085355355, |
| "grad_norm": 0.3342737555503845, |
| "learning_rate": 8.678373022198656e-06, |
| "loss": 0.6426, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.024162131403475496, |
| "grad_norm": 0.32655027508735657, |
| "learning_rate": 8.675248607265129e-06, |
| "loss": 0.5673, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.024191312721595633, |
| "grad_norm": 0.6058974266052246, |
| "learning_rate": 8.672121067388107e-06, |
| "loss": 1.8147, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.024220494039715774, |
| "grad_norm": 0.3183436691761017, |
| "learning_rate": 8.668990405226832e-06, |
| "loss": 1.0557, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.024249675357835912, |
| "grad_norm": 0.31138086318969727, |
| "learning_rate": 8.665856623443203e-06, |
| "loss": 0.366, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.024278856675956053, |
| "grad_norm": 0.3633839190006256, |
| "learning_rate": 8.662719724701772e-06, |
| "loss": 1.3508, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.024308037994076194, |
| "grad_norm": 0.3213428854942322, |
| "learning_rate": 8.659579711669744e-06, |
| "loss": 0.4543, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.02433721931219633, |
| "grad_norm": 0.39051470160484314, |
| "learning_rate": 8.656436587016967e-06, |
| "loss": 0.8104, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.024366400630316472, |
| "grad_norm": 0.31337258219718933, |
| "learning_rate": 8.653290353415937e-06, |
| "loss": 0.4357, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.02439558194843661, |
| "grad_norm": 0.33449888229370117, |
| "learning_rate": 8.650141013541794e-06, |
| "loss": 0.6033, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.02442476326655675, |
| "grad_norm": 0.36728617548942566, |
| "learning_rate": 8.64698857007232e-06, |
| "loss": 0.6555, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.02445394458467689, |
| "grad_norm": 0.36893102526664734, |
| "learning_rate": 8.643833025687931e-06, |
| "loss": 1.1632, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.02448312590279703, |
| "grad_norm": 0.3861575126647949, |
| "learning_rate": 8.640674383071686e-06, |
| "loss": 0.6429, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.02451230722091717, |
| "grad_norm": 0.3969190716743469, |
| "learning_rate": 8.637512644909274e-06, |
| "loss": 1.0244, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.024541488539037308, |
| "grad_norm": 0.4696947932243347, |
| "learning_rate": 8.634347813889017e-06, |
| "loss": 0.8897, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.02457066985715745, |
| "grad_norm": 0.3451467454433441, |
| "learning_rate": 8.631179892701868e-06, |
| "loss": 0.743, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.024599851175277587, |
| "grad_norm": 0.43549051880836487, |
| "learning_rate": 8.628008884041405e-06, |
| "loss": 1.329, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.024629032493397728, |
| "grad_norm": 0.34361088275909424, |
| "learning_rate": 8.624834790603835e-06, |
| "loss": 0.4077, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.024658213811517865, |
| "grad_norm": 0.3678019940853119, |
| "learning_rate": 8.621657615087986e-06, |
| "loss": 0.6677, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.024687395129638006, |
| "grad_norm": 0.32565921545028687, |
| "learning_rate": 8.618477360195304e-06, |
| "loss": 1.1953, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.024716576447758144, |
| "grad_norm": 0.34741654992103577, |
| "learning_rate": 8.615294028629857e-06, |
| "loss": 1.1094, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.024745757765878285, |
| "grad_norm": 0.3462331295013428, |
| "learning_rate": 8.612107623098326e-06, |
| "loss": 0.6798, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.024774939083998426, |
| "grad_norm": 0.3555474281311035, |
| "learning_rate": 8.60891814631001e-06, |
| "loss": 0.5952, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.024804120402118563, |
| "grad_norm": 0.36785125732421875, |
| "learning_rate": 8.605725600976813e-06, |
| "loss": 0.4806, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.024833301720238704, |
| "grad_norm": 0.35969629883766174, |
| "learning_rate": 8.602529989813255e-06, |
| "loss": 0.6508, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.02486248303835884, |
| "grad_norm": 0.39098218083381653, |
| "learning_rate": 8.599331315536457e-06, |
| "loss": 0.8673, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.024891664356478983, |
| "grad_norm": 0.3419669568538666, |
| "learning_rate": 8.596129580866149e-06, |
| "loss": 0.8848, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.02492084567459912, |
| "grad_norm": 0.37696176767349243, |
| "learning_rate": 8.59292478852466e-06, |
| "loss": 0.7082, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.02495002699271926, |
| "grad_norm": 0.41791391372680664, |
| "learning_rate": 8.589716941236922e-06, |
| "loss": 1.3913, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.024979208310839402, |
| "grad_norm": 0.35013699531555176, |
| "learning_rate": 8.586506041730458e-06, |
| "loss": 1.1117, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.02500838962895954, |
| "grad_norm": 0.3478122353553772, |
| "learning_rate": 8.583292092735396e-06, |
| "loss": 0.3626, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.02503757094707968, |
| "grad_norm": 0.34681540727615356, |
| "learning_rate": 8.580075096984447e-06, |
| "loss": 0.7394, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.025066752265199818, |
| "grad_norm": 0.38682496547698975, |
| "learning_rate": 8.576855057212918e-06, |
| "loss": 0.6958, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.02509593358331996, |
| "grad_norm": 0.30751389265060425, |
| "learning_rate": 8.573631976158707e-06, |
| "loss": 0.3471, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.025125114901440097, |
| "grad_norm": 0.3397458791732788, |
| "learning_rate": 8.570405856562289e-06, |
| "loss": 0.6752, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.025154296219560238, |
| "grad_norm": 0.3534058630466461, |
| "learning_rate": 8.567176701166732e-06, |
| "loss": 0.5335, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.02518347753768038, |
| "grad_norm": 0.34201693534851074, |
| "learning_rate": 8.56394451271768e-06, |
| "loss": 0.639, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.025212658855800516, |
| "grad_norm": 0.346853107213974, |
| "learning_rate": 8.560709293963356e-06, |
| "loss": 1.7195, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.025241840173920657, |
| "grad_norm": 0.4329560101032257, |
| "learning_rate": 8.55747104765456e-06, |
| "loss": 1.0585, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.025271021492040795, |
| "grad_norm": 0.4640330970287323, |
| "learning_rate": 8.554229776544667e-06, |
| "loss": 0.6116, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.025300202810160936, |
| "grad_norm": 0.3633824586868286, |
| "learning_rate": 8.550985483389627e-06, |
| "loss": 0.5177, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.025329384128281073, |
| "grad_norm": 0.3476978540420532, |
| "learning_rate": 8.547738170947952e-06, |
| "loss": 0.4096, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.025358565446401214, |
| "grad_norm": 0.349484920501709, |
| "learning_rate": 8.544487841980726e-06, |
| "loss": 0.5414, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.02538774676452135, |
| "grad_norm": 0.33577412366867065, |
| "learning_rate": 8.541234499251599e-06, |
| "loss": 1.331, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.025416928082641493, |
| "grad_norm": 0.3705176115036011, |
| "learning_rate": 8.537978145526783e-06, |
| "loss": 0.6475, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.025446109400761634, |
| "grad_norm": 0.3451250493526459, |
| "learning_rate": 8.534718783575044e-06, |
| "loss": 0.5252, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.02547529071888177, |
| "grad_norm": 0.3250756561756134, |
| "learning_rate": 8.531456416167713e-06, |
| "loss": 0.5049, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.025504472037001912, |
| "grad_norm": 0.32594504952430725, |
| "learning_rate": 8.528191046078678e-06, |
| "loss": 1.8138, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.02553365335512205, |
| "grad_norm": 0.33167028427124023, |
| "learning_rate": 8.524922676084369e-06, |
| "loss": 0.5168, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.02556283467324219, |
| "grad_norm": 0.2993742823600769, |
| "learning_rate": 8.52165130896378e-06, |
| "loss": 1.0589, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.025592015991362328, |
| "grad_norm": 0.28559255599975586, |
| "learning_rate": 8.518376947498445e-06, |
| "loss": 0.3246, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.02562119730948247, |
| "grad_norm": 0.37067145109176636, |
| "learning_rate": 8.515099594472447e-06, |
| "loss": 1.1066, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.02565037862760261, |
| "grad_norm": 0.3459414839744568, |
| "learning_rate": 8.511819252672409e-06, |
| "loss": 1.1633, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.025679559945722748, |
| "grad_norm": 0.32875338196754456, |
| "learning_rate": 8.508535924887504e-06, |
| "loss": 0.7569, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.02570874126384289, |
| "grad_norm": 0.3915788531303406, |
| "learning_rate": 8.505249613909431e-06, |
| "loss": 1.091, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.025737922581963026, |
| "grad_norm": 0.3314580023288727, |
| "learning_rate": 8.501960322532438e-06, |
| "loss": 0.5122, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.025767103900083167, |
| "grad_norm": 0.34932106733322144, |
| "learning_rate": 8.4986680535533e-06, |
| "loss": 0.6582, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.025796285218203305, |
| "grad_norm": 0.3262951374053955, |
| "learning_rate": 8.495372809771324e-06, |
| "loss": 0.5165, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.025825466536323446, |
| "grad_norm": 0.3734574317932129, |
| "learning_rate": 8.492074593988351e-06, |
| "loss": 1.6527, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.025854647854443587, |
| "grad_norm": 0.4937126040458679, |
| "learning_rate": 8.488773409008741e-06, |
| "loss": 0.7144, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.025883829172563724, |
| "grad_norm": 0.8940378427505493, |
| "learning_rate": 8.48546925763939e-06, |
| "loss": 0.9933, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.025913010490683865, |
| "grad_norm": 0.378903329372406, |
| "learning_rate": 8.482162142689704e-06, |
| "loss": 1.5433, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.025942191808804003, |
| "grad_norm": 0.2883460521697998, |
| "learning_rate": 8.478852066971617e-06, |
| "loss": 0.369, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.025971373126924144, |
| "grad_norm": 0.38854387402534485, |
| "learning_rate": 8.475539033299578e-06, |
| "loss": 1.2095, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.02600055444504428, |
| "grad_norm": 0.34542709589004517, |
| "learning_rate": 8.472223044490549e-06, |
| "loss": 0.4971, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.026029735763164422, |
| "grad_norm": 0.3368820548057556, |
| "learning_rate": 8.46890410336401e-06, |
| "loss": 0.6433, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.026058917081284563, |
| "grad_norm": 0.37869739532470703, |
| "learning_rate": 8.465582212741947e-06, |
| "loss": 1.0214, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.0260880983994047, |
| "grad_norm": 0.3899117708206177, |
| "learning_rate": 8.46225737544885e-06, |
| "loss": 0.9141, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.026117279717524842, |
| "grad_norm": 0.3589392900466919, |
| "learning_rate": 8.458929594311727e-06, |
| "loss": 0.63, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.02614646103564498, |
| "grad_norm": 0.3222079277038574, |
| "learning_rate": 8.455598872160075e-06, |
| "loss": 0.9899, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.02617564235376512, |
| "grad_norm": 0.38838139176368713, |
| "learning_rate": 8.452265211825903e-06, |
| "loss": 0.6277, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.026204823671885258, |
| "grad_norm": 0.31074798107147217, |
| "learning_rate": 8.448928616143709e-06, |
| "loss": 0.4076, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.0262340049900054, |
| "grad_norm": 0.37609365582466125, |
| "learning_rate": 8.445589087950491e-06, |
| "loss": 1.7928, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.026263186308125536, |
| "grad_norm": 0.34537938237190247, |
| "learning_rate": 8.442246630085744e-06, |
| "loss": 0.4925, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.026292367626245677, |
| "grad_norm": 0.45988473296165466, |
| "learning_rate": 8.43890124539145e-06, |
| "loss": 1.151, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.02632154894436582, |
| "grad_norm": 0.6593732833862305, |
| "learning_rate": 8.435552936712074e-06, |
| "loss": 0.6752, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.026350730262485956, |
| "grad_norm": 0.3613491952419281, |
| "learning_rate": 8.43220170689458e-06, |
| "loss": 0.4724, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.026379911580606097, |
| "grad_norm": 0.32210037112236023, |
| "learning_rate": 8.428847558788408e-06, |
| "loss": 0.5491, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.026409092898726234, |
| "grad_norm": 0.2931801378726959, |
| "learning_rate": 8.425490495245477e-06, |
| "loss": 0.9981, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.026438274216846375, |
| "grad_norm": 0.3886311948299408, |
| "learning_rate": 8.42213051912019e-06, |
| "loss": 0.6799, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.026467455534966513, |
| "grad_norm": 0.302743136882782, |
| "learning_rate": 8.418767633269424e-06, |
| "loss": 0.3746, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.026496636853086654, |
| "grad_norm": 0.4175827205181122, |
| "learning_rate": 8.415401840552532e-06, |
| "loss": 1.0159, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.026525818171206795, |
| "grad_norm": 0.33958056569099426, |
| "learning_rate": 8.412033143831338e-06, |
| "loss": 1.0346, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.026554999489326932, |
| "grad_norm": 0.3762778341770172, |
| "learning_rate": 8.408661545970132e-06, |
| "loss": 0.6079, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.026584180807447073, |
| "grad_norm": 0.8803741931915283, |
| "learning_rate": 8.405287049835675e-06, |
| "loss": 1.0307, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.02661336212556721, |
| "grad_norm": 0.3808031976222992, |
| "learning_rate": 8.401909658297188e-06, |
| "loss": 0.5916, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.026642543443687352, |
| "grad_norm": 0.35952091217041016, |
| "learning_rate": 8.39852937422636e-06, |
| "loss": 0.9955, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.02667172476180749, |
| "grad_norm": 0.3484751284122467, |
| "learning_rate": 8.395146200497333e-06, |
| "loss": 0.5808, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.02670090607992763, |
| "grad_norm": 0.40232113003730774, |
| "learning_rate": 8.391760139986711e-06, |
| "loss": 0.5685, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.02673008739804777, |
| "grad_norm": 0.34646543860435486, |
| "learning_rate": 8.388371195573546e-06, |
| "loss": 0.6943, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.02675926871616791, |
| "grad_norm": 0.3797261416912079, |
| "learning_rate": 8.384979370139352e-06, |
| "loss": 0.976, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.02678845003428805, |
| "grad_norm": 0.3100775182247162, |
| "learning_rate": 8.381584666568086e-06, |
| "loss": 0.554, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.026817631352408187, |
| "grad_norm": 0.35303258895874023, |
| "learning_rate": 8.378187087746149e-06, |
| "loss": 0.6843, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.02684681267052833, |
| "grad_norm": 0.3995976448059082, |
| "learning_rate": 8.374786636562394e-06, |
| "loss": 0.551, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.026875993988648466, |
| "grad_norm": 0.4339585602283478, |
| "learning_rate": 8.371383315908115e-06, |
| "loss": 0.9765, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.026905175306768607, |
| "grad_norm": 0.3921199440956116, |
| "learning_rate": 8.367977128677041e-06, |
| "loss": 0.6011, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.026934356624888744, |
| "grad_norm": 0.35559672117233276, |
| "learning_rate": 8.364568077765343e-06, |
| "loss": 1.2612, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.026963537943008885, |
| "grad_norm": 0.37409472465515137, |
| "learning_rate": 8.361156166071624e-06, |
| "loss": 0.6968, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.026992719261129026, |
| "grad_norm": 0.36014920473098755, |
| "learning_rate": 8.35774139649692e-06, |
| "loss": 0.5237, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.027021900579249164, |
| "grad_norm": 0.4166014492511749, |
| "learning_rate": 8.354323771944703e-06, |
| "loss": 0.8852, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.027051081897369305, |
| "grad_norm": 0.5179449915885925, |
| "learning_rate": 8.350903295320863e-06, |
| "loss": 0.747, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.027080263215489443, |
| "grad_norm": 0.358181893825531, |
| "learning_rate": 8.347479969533719e-06, |
| "loss": 1.909, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.027109444533609584, |
| "grad_norm": 0.42436039447784424, |
| "learning_rate": 8.344053797494012e-06, |
| "loss": 0.9645, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.02713862585172972, |
| "grad_norm": 0.44533056020736694, |
| "learning_rate": 8.340624782114907e-06, |
| "loss": 1.0199, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.027167807169849862, |
| "grad_norm": 0.32696932554244995, |
| "learning_rate": 8.33719292631198e-06, |
| "loss": 0.6368, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.027196988487970003, |
| "grad_norm": 0.3207986354827881, |
| "learning_rate": 8.33375823300323e-06, |
| "loss": 0.4764, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.02722616980609014, |
| "grad_norm": 0.42434313893318176, |
| "learning_rate": 8.33032070510906e-06, |
| "loss": 0.8128, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.02725535112421028, |
| "grad_norm": 0.30024072527885437, |
| "learning_rate": 8.32688034555229e-06, |
| "loss": 1.0602, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.02728453244233042, |
| "grad_norm": 0.31329378485679626, |
| "learning_rate": 8.323437157258143e-06, |
| "loss": 0.4821, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.02731371376045056, |
| "grad_norm": 0.3860412836074829, |
| "learning_rate": 8.319991143154252e-06, |
| "loss": 0.4597, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.027342895078570698, |
| "grad_norm": 0.3964857757091522, |
| "learning_rate": 8.316542306170646e-06, |
| "loss": 0.939, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.02737207639669084, |
| "grad_norm": 0.3727983832359314, |
| "learning_rate": 8.31309064923976e-06, |
| "loss": 1.1637, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.02740125771481098, |
| "grad_norm": 0.35207825899124146, |
| "learning_rate": 8.309636175296427e-06, |
| "loss": 0.5325, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.027430439032931117, |
| "grad_norm": 0.45173379778862, |
| "learning_rate": 8.30617888727787e-06, |
| "loss": 1.0715, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.027459620351051258, |
| "grad_norm": 0.34614241123199463, |
| "learning_rate": 8.302718788123708e-06, |
| "loss": 0.5424, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.027488801669171396, |
| "grad_norm": 0.33969104290008545, |
| "learning_rate": 8.299255880775952e-06, |
| "loss": 0.6815, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.027517982987291537, |
| "grad_norm": 0.3439977467060089, |
| "learning_rate": 8.295790168178997e-06, |
| "loss": 0.5332, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.027547164305411674, |
| "grad_norm": 0.38102537393569946, |
| "learning_rate": 8.292321653279627e-06, |
| "loss": 0.6612, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.027576345623531815, |
| "grad_norm": 0.3824597895145416, |
| "learning_rate": 8.288850339027003e-06, |
| "loss": 0.5719, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.027605526941651956, |
| "grad_norm": 0.34761905670166016, |
| "learning_rate": 8.285376228372673e-06, |
| "loss": 0.4407, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.027634708259772094, |
| "grad_norm": 0.5802657008171082, |
| "learning_rate": 8.281899324270557e-06, |
| "loss": 1.2267, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.027663889577892235, |
| "grad_norm": 0.325278639793396, |
| "learning_rate": 8.278419629676955e-06, |
| "loss": 0.5031, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.027693070896012372, |
| "grad_norm": 0.36002323031425476, |
| "learning_rate": 8.274937147550534e-06, |
| "loss": 0.658, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.027722252214132513, |
| "grad_norm": 0.33307257294654846, |
| "learning_rate": 8.27145188085234e-06, |
| "loss": 0.495, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.02775143353225265, |
| "grad_norm": 0.36557403206825256, |
| "learning_rate": 8.267963832545775e-06, |
| "loss": 1.0257, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.02778061485037279, |
| "grad_norm": 0.3987572193145752, |
| "learning_rate": 8.264473005596616e-06, |
| "loss": 1.2238, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.02780979616849293, |
| "grad_norm": 0.41120660305023193, |
| "learning_rate": 8.260979402972998e-06, |
| "loss": 1.7157, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.02783897748661307, |
| "grad_norm": 0.5519174933433533, |
| "learning_rate": 8.257483027645418e-06, |
| "loss": 1.5844, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.02786815880473321, |
| "grad_norm": 0.3390037715435028, |
| "learning_rate": 8.253983882586726e-06, |
| "loss": 1.1528, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.02789734012285335, |
| "grad_norm": 0.29556405544281006, |
| "learning_rate": 8.250481970772133e-06, |
| "loss": 1.0514, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.02792652144097349, |
| "grad_norm": 0.43600043654441833, |
| "learning_rate": 8.246977295179201e-06, |
| "loss": 1.0094, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.027955702759093627, |
| "grad_norm": 0.34551119804382324, |
| "learning_rate": 8.243469858787837e-06, |
| "loss": 0.6437, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.027984884077213768, |
| "grad_norm": 0.42130881547927856, |
| "learning_rate": 8.239959664580304e-06, |
| "loss": 0.9177, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.028014065395333906, |
| "grad_norm": 0.44574490189552307, |
| "learning_rate": 8.2364467155412e-06, |
| "loss": 1.1658, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.028043246713454047, |
| "grad_norm": 0.4009976387023926, |
| "learning_rate": 8.232931014657475e-06, |
| "loss": 1.1539, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.028072428031574188, |
| "grad_norm": 0.3993317186832428, |
| "learning_rate": 8.229412564918411e-06, |
| "loss": 0.5592, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.028101609349694325, |
| "grad_norm": 0.5721306204795837, |
| "learning_rate": 8.225891369315632e-06, |
| "loss": 0.9007, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.028130790667814466, |
| "grad_norm": 0.3204282224178314, |
| "learning_rate": 8.222367430843096e-06, |
| "loss": 0.4966, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.028159971985934604, |
| "grad_norm": 0.3686912953853607, |
| "learning_rate": 8.218840752497092e-06, |
| "loss": 0.5918, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.028189153304054745, |
| "grad_norm": 0.3475451171398163, |
| "learning_rate": 8.21531133727624e-06, |
| "loss": 0.4296, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.028218334622174882, |
| "grad_norm": 0.27553805708885193, |
| "learning_rate": 8.211779188181484e-06, |
| "loss": 0.9739, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.028247515940295023, |
| "grad_norm": 0.34486255049705505, |
| "learning_rate": 8.208244308216095e-06, |
| "loss": 1.0188, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.028276697258415164, |
| "grad_norm": 0.33501988649368286, |
| "learning_rate": 8.204706700385667e-06, |
| "loss": 0.5771, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.0283058785765353, |
| "grad_norm": 0.3335632085800171, |
| "learning_rate": 8.201166367698111e-06, |
| "loss": 0.4733, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.028335059894655443, |
| "grad_norm": 0.36643606424331665, |
| "learning_rate": 8.197623313163656e-06, |
| "loss": 1.1803, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.02836424121277558, |
| "grad_norm": 0.4409978985786438, |
| "learning_rate": 8.194077539794845e-06, |
| "loss": 1.3449, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.02839342253089572, |
| "grad_norm": 0.4731258451938629, |
| "learning_rate": 8.190529050606536e-06, |
| "loss": 0.5925, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.02842260384901586, |
| "grad_norm": 0.37104079127311707, |
| "learning_rate": 8.186977848615888e-06, |
| "loss": 0.6354, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.028451785167136, |
| "grad_norm": 0.3235186040401459, |
| "learning_rate": 8.183423936842373e-06, |
| "loss": 0.5021, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.02848096648525614, |
| "grad_norm": 0.3571196496486664, |
| "learning_rate": 8.17986731830777e-06, |
| "loss": 0.6292, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.02851014780337628, |
| "grad_norm": 0.34218212962150574, |
| "learning_rate": 8.17630799603615e-06, |
| "loss": 0.6586, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.02853932912149642, |
| "grad_norm": 0.3450371026992798, |
| "learning_rate": 8.172745973053893e-06, |
| "loss": 0.4681, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.028568510439616557, |
| "grad_norm": 0.4701233208179474, |
| "learning_rate": 8.169181252389665e-06, |
| "loss": 1.0334, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.028597691757736698, |
| "grad_norm": 0.36280113458633423, |
| "learning_rate": 8.165613837074436e-06, |
| "loss": 0.6493, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.028626873075856835, |
| "grad_norm": 0.36414480209350586, |
| "learning_rate": 8.162043730141459e-06, |
| "loss": 0.6326, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.028656054393976976, |
| "grad_norm": 0.45983657240867615, |
| "learning_rate": 8.15847093462628e-06, |
| "loss": 0.8886, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.028685235712097114, |
| "grad_norm": 0.4376150667667389, |
| "learning_rate": 8.154895453566731e-06, |
| "loss": 2.0379, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.028714417030217255, |
| "grad_norm": 0.3629320561885834, |
| "learning_rate": 8.151317290002927e-06, |
| "loss": 0.5055, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.028743598348337396, |
| "grad_norm": 0.3869571387767792, |
| "learning_rate": 8.147736446977262e-06, |
| "loss": 0.4559, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.028772779666457533, |
| "grad_norm": 0.39907169342041016, |
| "learning_rate": 8.144152927534407e-06, |
| "loss": 1.1915, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.028801960984577674, |
| "grad_norm": 0.37691083550453186, |
| "learning_rate": 8.140566734721317e-06, |
| "loss": 0.5816, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.028831142302697812, |
| "grad_norm": 0.3822781443595886, |
| "learning_rate": 8.136977871587208e-06, |
| "loss": 0.5801, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.028860323620817953, |
| "grad_norm": 0.657067596912384, |
| "learning_rate": 8.133386341183578e-06, |
| "loss": 1.3017, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.02888950493893809, |
| "grad_norm": 0.39143985509872437, |
| "learning_rate": 8.129792146564184e-06, |
| "loss": 1.2194, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.02891868625705823, |
| "grad_norm": 0.3870909512042999, |
| "learning_rate": 8.126195290785054e-06, |
| "loss": 1.1121, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.028947867575178372, |
| "grad_norm": 0.368807315826416, |
| "learning_rate": 8.122595776904476e-06, |
| "loss": 1.3513, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.02897704889329851, |
| "grad_norm": 0.3934575021266937, |
| "learning_rate": 8.118993607983e-06, |
| "loss": 1.5117, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.02900623021141865, |
| "grad_norm": 0.3632809519767761, |
| "learning_rate": 8.115388787083429e-06, |
| "loss": 1.0243, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.02903541152953879, |
| "grad_norm": 0.33420994877815247, |
| "learning_rate": 8.111781317270828e-06, |
| "loss": 0.4982, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.02906459284765893, |
| "grad_norm": 0.33691009879112244, |
| "learning_rate": 8.108171201612507e-06, |
| "loss": 0.514, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.029093774165779067, |
| "grad_norm": 0.418117880821228, |
| "learning_rate": 8.10455844317803e-06, |
| "loss": 0.7872, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.029122955483899208, |
| "grad_norm": 0.3103129267692566, |
| "learning_rate": 8.100943045039208e-06, |
| "loss": 0.5167, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.02915213680201935, |
| "grad_norm": 0.5365143418312073, |
| "learning_rate": 8.097325010270098e-06, |
| "loss": 0.551, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.029181318120139486, |
| "grad_norm": 0.3610197603702545, |
| "learning_rate": 8.093704341946994e-06, |
| "loss": 1.143, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.029210499438259627, |
| "grad_norm": 0.3408326506614685, |
| "learning_rate": 8.090081043148428e-06, |
| "loss": 0.4527, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.029239680756379765, |
| "grad_norm": 0.4250684678554535, |
| "learning_rate": 8.086455116955177e-06, |
| "loss": 0.8597, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.029268862074499906, |
| "grad_norm": 0.3899366855621338, |
| "learning_rate": 8.082826566450247e-06, |
| "loss": 0.8208, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.029298043392620043, |
| "grad_norm": 0.3285711407661438, |
| "learning_rate": 8.079195394718873e-06, |
| "loss": 0.5483, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.029327224710740184, |
| "grad_norm": 0.32631656527519226, |
| "learning_rate": 8.075561604848524e-06, |
| "loss": 0.5033, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.029356406028860322, |
| "grad_norm": 0.3740943968296051, |
| "learning_rate": 8.071925199928891e-06, |
| "loss": 0.965, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.029385587346980463, |
| "grad_norm": 0.4460504651069641, |
| "learning_rate": 8.068286183051892e-06, |
| "loss": 0.7444, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.029414768665100604, |
| "grad_norm": 0.44715797901153564, |
| "learning_rate": 8.064644557311661e-06, |
| "loss": 1.2786, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.02944394998322074, |
| "grad_norm": 0.44176000356674194, |
| "learning_rate": 8.061000325804555e-06, |
| "loss": 0.9424, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.029473131301340882, |
| "grad_norm": 0.3088590204715729, |
| "learning_rate": 8.057353491629146e-06, |
| "loss": 0.4233, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.02950231261946102, |
| "grad_norm": 0.3727954924106598, |
| "learning_rate": 8.053704057886213e-06, |
| "loss": 0.5397, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.02953149393758116, |
| "grad_norm": 0.37700340151786804, |
| "learning_rate": 8.050052027678753e-06, |
| "loss": 0.5766, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.0295606752557013, |
| "grad_norm": 0.39949533343315125, |
| "learning_rate": 8.046397404111967e-06, |
| "loss": 1.6508, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.02958985657382144, |
| "grad_norm": 0.37667009234428406, |
| "learning_rate": 8.042740190293264e-06, |
| "loss": 0.6718, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.02961903789194158, |
| "grad_norm": 0.3952672779560089, |
| "learning_rate": 8.03908038933225e-06, |
| "loss": 0.5718, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.029648219210061718, |
| "grad_norm": 0.4118516147136688, |
| "learning_rate": 8.035418004340734e-06, |
| "loss": 0.9345, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.02967740052818186, |
| "grad_norm": 0.48455119132995605, |
| "learning_rate": 8.031753038432725e-06, |
| "loss": 1.035, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.029706581846301996, |
| "grad_norm": 0.35907724499702454, |
| "learning_rate": 8.02808549472442e-06, |
| "loss": 1.0484, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.029735763164422137, |
| "grad_norm": 0.43426626920700073, |
| "learning_rate": 8.024415376334214e-06, |
| "loss": 1.5179, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.029764944482542275, |
| "grad_norm": 0.36434200406074524, |
| "learning_rate": 8.020742686382687e-06, |
| "loss": 0.6114, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.029794125800662416, |
| "grad_norm": 0.33498191833496094, |
| "learning_rate": 8.017067427992607e-06, |
| "loss": 0.4032, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.029823307118782557, |
| "grad_norm": 0.2691289782524109, |
| "learning_rate": 8.013389604288924e-06, |
| "loss": 0.3288, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.029852488436902695, |
| "grad_norm": 0.30916455388069153, |
| "learning_rate": 8.009709218398775e-06, |
| "loss": 1.0012, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.029881669755022836, |
| "grad_norm": 0.31660401821136475, |
| "learning_rate": 8.006026273451469e-06, |
| "loss": 1.1576, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.029910851073142973, |
| "grad_norm": 0.39816707372665405, |
| "learning_rate": 8.002340772578494e-06, |
| "loss": 0.7451, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.029940032391263114, |
| "grad_norm": 0.3330983817577362, |
| "learning_rate": 7.99865271891351e-06, |
| "loss": 0.5398, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.02996921370938325, |
| "grad_norm": 0.5938121676445007, |
| "learning_rate": 7.99496211559235e-06, |
| "loss": 0.8163, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.029998395027503393, |
| "grad_norm": 0.3240002989768982, |
| "learning_rate": 7.991268965753012e-06, |
| "loss": 0.9836, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.030027576345623534, |
| "grad_norm": 0.34322166442871094, |
| "learning_rate": 7.98757327253566e-06, |
| "loss": 0.8953, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.03005675766374367, |
| "grad_norm": 0.4308999478816986, |
| "learning_rate": 7.98387503908262e-06, |
| "loss": 0.5594, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.030085938981863812, |
| "grad_norm": 0.3498215675354004, |
| "learning_rate": 7.980174268538383e-06, |
| "loss": 0.5586, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.03011512029998395, |
| "grad_norm": 0.2966274917125702, |
| "learning_rate": 7.976470964049587e-06, |
| "loss": 0.4391, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.03014430161810409, |
| "grad_norm": 0.37053200602531433, |
| "learning_rate": 7.972765128765034e-06, |
| "loss": 0.4673, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.030173482936224228, |
| "grad_norm": 0.3890681564807892, |
| "learning_rate": 7.969056765835674e-06, |
| "loss": 1.0385, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.03020266425434437, |
| "grad_norm": 0.49088913202285767, |
| "learning_rate": 7.965345878414606e-06, |
| "loss": 1.0126, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.030231845572464507, |
| "grad_norm": 0.36411839723587036, |
| "learning_rate": 7.961632469657077e-06, |
| "loss": 0.6179, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.030261026890584648, |
| "grad_norm": 0.4549872577190399, |
| "learning_rate": 7.957916542720477e-06, |
| "loss": 0.7976, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.03029020820870479, |
| "grad_norm": 0.33350419998168945, |
| "learning_rate": 7.954198100764332e-06, |
| "loss": 0.586, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.030319389526824926, |
| "grad_norm": 0.3661065399646759, |
| "learning_rate": 7.95047714695032e-06, |
| "loss": 0.6217, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.030348570844945067, |
| "grad_norm": 0.3837584853172302, |
| "learning_rate": 7.946753684442238e-06, |
| "loss": 0.5969, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.030377752163065205, |
| "grad_norm": 0.5177484154701233, |
| "learning_rate": 7.94302771640603e-06, |
| "loss": 0.6189, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.030406933481185346, |
| "grad_norm": 0.33467498421669006, |
| "learning_rate": 7.939299246009763e-06, |
| "loss": 1.1826, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.030436114799305483, |
| "grad_norm": 0.3024804890155792, |
| "learning_rate": 7.93556827642363e-06, |
| "loss": 0.3418, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.030465296117425624, |
| "grad_norm": 0.37285321950912476, |
| "learning_rate": 7.931834810819957e-06, |
| "loss": 0.5064, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.030494477435545765, |
| "grad_norm": 0.29143035411834717, |
| "learning_rate": 7.928098852373186e-06, |
| "loss": 0.3542, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.030523658753665903, |
| "grad_norm": 0.34744787216186523, |
| "learning_rate": 7.924360404259876e-06, |
| "loss": 0.4747, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.030552840071786044, |
| "grad_norm": 0.3071443438529968, |
| "learning_rate": 7.920619469658712e-06, |
| "loss": 1.6283, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.03058202138990618, |
| "grad_norm": 0.38159486651420593, |
| "learning_rate": 7.916876051750486e-06, |
| "loss": 0.5442, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.030611202708026322, |
| "grad_norm": 0.4748477637767792, |
| "learning_rate": 7.913130153718105e-06, |
| "loss": 0.8678, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.03064038402614646, |
| "grad_norm": 0.44142308831214905, |
| "learning_rate": 7.909381778746583e-06, |
| "loss": 0.5497, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.0306695653442666, |
| "grad_norm": 0.4423367381095886, |
| "learning_rate": 7.905630930023038e-06, |
| "loss": 1.1476, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.03069874666238674, |
| "grad_norm": 0.5354142785072327, |
| "learning_rate": 7.9018776107367e-06, |
| "loss": 0.8074, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.03072792798050688, |
| "grad_norm": 0.38381558656692505, |
| "learning_rate": 7.898121824078884e-06, |
| "loss": 0.6096, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.03075710929862702, |
| "grad_norm": 0.43491822481155396, |
| "learning_rate": 7.89436357324302e-06, |
| "loss": 1.2682, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.030786290616747158, |
| "grad_norm": 0.44252243638038635, |
| "learning_rate": 7.890602861424624e-06, |
| "loss": 0.7504, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.0308154719348673, |
| "grad_norm": 0.37600377202033997, |
| "learning_rate": 7.886839691821302e-06, |
| "loss": 0.7108, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.030844653252987436, |
| "grad_norm": 0.3634088635444641, |
| "learning_rate": 7.883074067632758e-06, |
| "loss": 0.5251, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.030873834571107577, |
| "grad_norm": 0.4122987389564514, |
| "learning_rate": 7.879305992060781e-06, |
| "loss": 0.8763, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.030903015889227715, |
| "grad_norm": 0.39387229084968567, |
| "learning_rate": 7.875535468309236e-06, |
| "loss": 0.5157, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.030932197207347856, |
| "grad_norm": 0.4322737157344818, |
| "learning_rate": 7.87176249958408e-06, |
| "loss": 0.7394, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.030961378525467997, |
| "grad_norm": 0.36371564865112305, |
| "learning_rate": 7.867987089093346e-06, |
| "loss": 0.5054, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.030990559843588134, |
| "grad_norm": 0.3461953401565552, |
| "learning_rate": 7.864209240047136e-06, |
| "loss": 0.8237, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.031019741161708275, |
| "grad_norm": 0.2892926335334778, |
| "learning_rate": 7.860428955657637e-06, |
| "loss": 0.4529, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.031048922479828413, |
| "grad_norm": 0.34508439898490906, |
| "learning_rate": 7.856646239139096e-06, |
| "loss": 0.5666, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.031078103797948554, |
| "grad_norm": 0.4654565751552582, |
| "learning_rate": 7.852861093707838e-06, |
| "loss": 0.7985, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.03110728511606869, |
| "grad_norm": 0.3912844955921173, |
| "learning_rate": 7.849073522582247e-06, |
| "loss": 1.2279, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.031136466434188832, |
| "grad_norm": 0.4318024814128876, |
| "learning_rate": 7.845283528982768e-06, |
| "loss": 0.5041, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.031165647752308973, |
| "grad_norm": 0.3556537628173828, |
| "learning_rate": 7.84149111613191e-06, |
| "loss": 1.0727, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.03119482907042911, |
| "grad_norm": 0.3856569230556488, |
| "learning_rate": 7.837696287254238e-06, |
| "loss": 0.7528, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.031224010388549252, |
| "grad_norm": 0.4211376905441284, |
| "learning_rate": 7.83389904557637e-06, |
| "loss": 0.4889, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.03125319170666939, |
| "grad_norm": 0.4091140627861023, |
| "learning_rate": 7.830099394326975e-06, |
| "loss": 0.7124, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.03128237302478953, |
| "grad_norm": 0.3757210671901703, |
| "learning_rate": 7.826297336736774e-06, |
| "loss": 0.5778, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.03131155434290967, |
| "grad_norm": 0.4309159517288208, |
| "learning_rate": 7.822492876038532e-06, |
| "loss": 0.6366, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.03134073566102981, |
| "grad_norm": 0.4585062563419342, |
| "learning_rate": 7.818686015467057e-06, |
| "loss": 0.8283, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.031369916979149946, |
| "grad_norm": 0.3809289038181305, |
| "learning_rate": 7.814876758259198e-06, |
| "loss": 0.6711, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.03139909829727009, |
| "grad_norm": 0.5379712581634521, |
| "learning_rate": 7.811065107653839e-06, |
| "loss": 0.7728, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.03142827961539023, |
| "grad_norm": 0.3441392779350281, |
| "learning_rate": 7.807251066891906e-06, |
| "loss": 0.5319, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.031457460933510366, |
| "grad_norm": 0.463530570268631, |
| "learning_rate": 7.803434639216348e-06, |
| "loss": 0.914, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.0314866422516305, |
| "grad_norm": 0.3516235649585724, |
| "learning_rate": 7.79961582787215e-06, |
| "loss": 0.584, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.03151582356975065, |
| "grad_norm": 0.3726338744163513, |
| "learning_rate": 7.795794636106328e-06, |
| "loss": 0.4631, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.031545004887870785, |
| "grad_norm": 0.42811986804008484, |
| "learning_rate": 7.791971067167908e-06, |
| "loss": 0.6871, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.03157418620599092, |
| "grad_norm": 0.4483218789100647, |
| "learning_rate": 7.78814512430795e-06, |
| "loss": 1.1861, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.03160336752411107, |
| "grad_norm": 0.34980836510658264, |
| "learning_rate": 7.784316810779527e-06, |
| "loss": 0.5153, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.031632548842231205, |
| "grad_norm": 0.38568314909935, |
| "learning_rate": 7.780486129837728e-06, |
| "loss": 0.4124, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.03166173016035134, |
| "grad_norm": 0.36355653405189514, |
| "learning_rate": 7.776653084739656e-06, |
| "loss": 0.7252, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.03169091147847148, |
| "grad_norm": 0.364662766456604, |
| "learning_rate": 7.772817678744424e-06, |
| "loss": 0.566, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.031720092796591624, |
| "grad_norm": 0.2872055768966675, |
| "learning_rate": 7.768979915113151e-06, |
| "loss": 0.406, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.03174927411471176, |
| "grad_norm": 0.34369534254074097, |
| "learning_rate": 7.765139797108962e-06, |
| "loss": 0.5794, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.0317784554328319, |
| "grad_norm": 0.30418676137924194, |
| "learning_rate": 7.761297327996983e-06, |
| "loss": 0.4197, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.031807636750952044, |
| "grad_norm": 0.3705112040042877, |
| "learning_rate": 7.757452511044344e-06, |
| "loss": 0.5224, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.03183681806907218, |
| "grad_norm": 0.43729299306869507, |
| "learning_rate": 7.75360534952016e-06, |
| "loss": 1.0413, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.03186599938719232, |
| "grad_norm": 0.4053674340248108, |
| "learning_rate": 7.74975584669555e-06, |
| "loss": 0.3907, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.031895180705312456, |
| "grad_norm": 0.3476770520210266, |
| "learning_rate": 7.74590400584362e-06, |
| "loss": 0.5218, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.0319243620234326, |
| "grad_norm": 0.34308961033821106, |
| "learning_rate": 7.742049830239466e-06, |
| "loss": 0.522, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.03195354334155274, |
| "grad_norm": 0.3416766822338104, |
| "learning_rate": 7.738193323160163e-06, |
| "loss": 0.664, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.031982724659672876, |
| "grad_norm": 0.3378092646598816, |
| "learning_rate": 7.734334487884778e-06, |
| "loss": 0.5783, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.03201190597779301, |
| "grad_norm": 0.44200628995895386, |
| "learning_rate": 7.730473327694348e-06, |
| "loss": 0.7794, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.03204108729591316, |
| "grad_norm": 0.3616589605808258, |
| "learning_rate": 7.726609845871892e-06, |
| "loss": 0.6055, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.032070268614033295, |
| "grad_norm": 0.3770327568054199, |
| "learning_rate": 7.722744045702401e-06, |
| "loss": 0.5934, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.03209944993215343, |
| "grad_norm": 0.48067131638526917, |
| "learning_rate": 7.71887593047284e-06, |
| "loss": 1.0011, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.03212863125027358, |
| "grad_norm": 0.44122299551963806, |
| "learning_rate": 7.715005503472139e-06, |
| "loss": 0.9892, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.032157812568393715, |
| "grad_norm": 0.350125789642334, |
| "learning_rate": 7.711132767991198e-06, |
| "loss": 0.6958, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.03218699388651385, |
| "grad_norm": 0.39240893721580505, |
| "learning_rate": 7.707257727322876e-06, |
| "loss": 1.79, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.03221617520463399, |
| "grad_norm": 0.3865666389465332, |
| "learning_rate": 7.70338038476199e-06, |
| "loss": 0.7273, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.032245356522754134, |
| "grad_norm": 0.37556910514831543, |
| "learning_rate": 7.699500743605323e-06, |
| "loss": 0.5578, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.03227453784087427, |
| "grad_norm": 0.3241673409938812, |
| "learning_rate": 7.695618807151604e-06, |
| "loss": 0.5985, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.03230371915899441, |
| "grad_norm": 0.6130295395851135, |
| "learning_rate": 7.691734578701518e-06, |
| "loss": 0.4949, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.032332900477114554, |
| "grad_norm": 0.4039415717124939, |
| "learning_rate": 7.68784806155769e-06, |
| "loss": 0.6842, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.03236208179523469, |
| "grad_norm": 0.3703124523162842, |
| "learning_rate": 7.683959259024707e-06, |
| "loss": 0.5181, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.03239126311335483, |
| "grad_norm": 0.2917269468307495, |
| "learning_rate": 7.680068174409088e-06, |
| "loss": 0.3816, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.032420444431474967, |
| "grad_norm": 0.34110063314437866, |
| "learning_rate": 7.67617481101929e-06, |
| "loss": 1.232, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.03244962574959511, |
| "grad_norm": 0.2703442871570587, |
| "learning_rate": 7.67227917216572e-06, |
| "loss": 0.9118, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.03247880706771525, |
| "grad_norm": 0.4809757173061371, |
| "learning_rate": 7.668381261160707e-06, |
| "loss": 0.9091, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.032507988385835386, |
| "grad_norm": 0.39859843254089355, |
| "learning_rate": 7.664481081318518e-06, |
| "loss": 0.6396, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.03253716970395553, |
| "grad_norm": 0.3732917904853821, |
| "learning_rate": 7.66057863595535e-06, |
| "loss": 0.6296, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.03256635102207567, |
| "grad_norm": 0.3549406826496124, |
| "learning_rate": 7.65667392838932e-06, |
| "loss": 0.5951, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.032595532340195806, |
| "grad_norm": 0.4408586919307709, |
| "learning_rate": 7.652766961940479e-06, |
| "loss": 0.9631, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.03262471365831594, |
| "grad_norm": 0.3849949836730957, |
| "learning_rate": 7.648857739930789e-06, |
| "loss": 0.493, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.03265389497643609, |
| "grad_norm": 0.6702129244804382, |
| "learning_rate": 7.644946265684131e-06, |
| "loss": 0.6255, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.032683076294556225, |
| "grad_norm": 0.34080299735069275, |
| "learning_rate": 7.64103254252631e-06, |
| "loss": 0.5113, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.03271225761267636, |
| "grad_norm": 0.36026284098625183, |
| "learning_rate": 7.63711657378503e-06, |
| "loss": 0.5794, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.03274143893079651, |
| "grad_norm": 0.360908567905426, |
| "learning_rate": 7.633198362789916e-06, |
| "loss": 0.705, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.032770620248916645, |
| "grad_norm": 0.36677560210227966, |
| "learning_rate": 7.6292779128724925e-06, |
| "loss": 0.5346, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.03279980156703678, |
| "grad_norm": 0.4496927559375763, |
| "learning_rate": 7.625355227366188e-06, |
| "loss": 0.9781, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.03282898288515692, |
| "grad_norm": 0.393372505903244, |
| "learning_rate": 7.621430309606334e-06, |
| "loss": 0.4752, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.032858164203277064, |
| "grad_norm": 0.5015695095062256, |
| "learning_rate": 7.617503162930165e-06, |
| "loss": 0.6052, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.0328873455213972, |
| "grad_norm": 0.33584579825401306, |
| "learning_rate": 7.613573790676796e-06, |
| "loss": 0.5276, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.03291652683951734, |
| "grad_norm": 0.3761533796787262, |
| "learning_rate": 7.60964219618725e-06, |
| "loss": 1.2579, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.032945708157637484, |
| "grad_norm": 0.3163454234600067, |
| "learning_rate": 7.6057083828044306e-06, |
| "loss": 0.5218, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.03297488947575762, |
| "grad_norm": 0.7250996828079224, |
| "learning_rate": 7.6017723538731315e-06, |
| "loss": 0.7036, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.03300407079387776, |
| "grad_norm": 0.3986012041568756, |
| "learning_rate": 7.597834112740028e-06, |
| "loss": 1.4118, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.033033252111997896, |
| "grad_norm": 0.3263603448867798, |
| "learning_rate": 7.593893662753679e-06, |
| "loss": 0.5118, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.03306243343011804, |
| "grad_norm": 0.3380596339702606, |
| "learning_rate": 7.589951007264519e-06, |
| "loss": 1.1527, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.03309161474823818, |
| "grad_norm": 0.38954704999923706, |
| "learning_rate": 7.586006149624858e-06, |
| "loss": 1.4932, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.033120796066358316, |
| "grad_norm": 0.6492702960968018, |
| "learning_rate": 7.582059093188883e-06, |
| "loss": 0.6977, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.03314997738447846, |
| "grad_norm": 0.37414640188217163, |
| "learning_rate": 7.578109841312642e-06, |
| "loss": 0.477, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.0331791587025986, |
| "grad_norm": 0.4186456501483917, |
| "learning_rate": 7.574158397354056e-06, |
| "loss": 0.9542, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.033208340020718735, |
| "grad_norm": 0.36833125352859497, |
| "learning_rate": 7.570204764672911e-06, |
| "loss": 0.6121, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.03323752133883887, |
| "grad_norm": 0.3218482434749603, |
| "learning_rate": 7.566248946630847e-06, |
| "loss": 0.4056, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.03326670265695902, |
| "grad_norm": 0.4563610255718231, |
| "learning_rate": 7.562290946591371e-06, |
| "loss": 1.0987, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.033295883975079155, |
| "grad_norm": 0.3583057224750519, |
| "learning_rate": 7.558330767919837e-06, |
| "loss": 0.6237, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.03332506529319929, |
| "grad_norm": 0.3971406817436218, |
| "learning_rate": 7.5543684139834575e-06, |
| "loss": 1.9152, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.03335424661131944, |
| "grad_norm": 0.3493388593196869, |
| "learning_rate": 7.55040388815129e-06, |
| "loss": 0.484, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.033383427929439574, |
| "grad_norm": 0.3457147777080536, |
| "learning_rate": 7.546437193794244e-06, |
| "loss": 1.1294, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.03341260924755971, |
| "grad_norm": 0.322686105966568, |
| "learning_rate": 7.542468334285067e-06, |
| "loss": 1.0843, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.03344179056567985, |
| "grad_norm": 0.4524141848087311, |
| "learning_rate": 7.538497312998353e-06, |
| "loss": 1.0055, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.033470971883799994, |
| "grad_norm": 0.35150715708732605, |
| "learning_rate": 7.534524133310528e-06, |
| "loss": 0.6494, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.03350015320192013, |
| "grad_norm": 0.3671739101409912, |
| "learning_rate": 7.530548798599859e-06, |
| "loss": 0.921, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.03352933452004027, |
| "grad_norm": 0.3715021312236786, |
| "learning_rate": 7.526571312246444e-06, |
| "loss": 0.859, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.03355851583816041, |
| "grad_norm": 0.44701990485191345, |
| "learning_rate": 7.522591677632205e-06, |
| "loss": 0.4936, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.03358769715628055, |
| "grad_norm": 0.41474953293800354, |
| "learning_rate": 7.5186098981408984e-06, |
| "loss": 0.9482, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.03361687847440069, |
| "grad_norm": 0.4054737389087677, |
| "learning_rate": 7.5146259771581e-06, |
| "loss": 0.6202, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.033646059792520826, |
| "grad_norm": 0.4136887192726135, |
| "learning_rate": 7.510639918071207e-06, |
| "loss": 0.9124, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.03367524111064097, |
| "grad_norm": 0.46576860547065735, |
| "learning_rate": 7.5066517242694355e-06, |
| "loss": 1.817, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.03370442242876111, |
| "grad_norm": 0.33840325474739075, |
| "learning_rate": 7.502661399143816e-06, |
| "loss": 1.166, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.033733603746881245, |
| "grad_norm": 0.4206502139568329, |
| "learning_rate": 7.49866894608719e-06, |
| "loss": 0.5738, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.03376278506500138, |
| "grad_norm": 0.3557110130786896, |
| "learning_rate": 7.494674368494211e-06, |
| "loss": 0.5238, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.03379196638312153, |
| "grad_norm": 0.3478293716907501, |
| "learning_rate": 7.4906776697613356e-06, |
| "loss": 0.4904, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.033821147701241665, |
| "grad_norm": 0.39063262939453125, |
| "learning_rate": 7.486678853286829e-06, |
| "loss": 0.6281, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.0338503290193618, |
| "grad_norm": 0.47280237078666687, |
| "learning_rate": 7.482677922470751e-06, |
| "loss": 0.8061, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.03387951033748195, |
| "grad_norm": 0.35882726311683655, |
| "learning_rate": 7.478674880714962e-06, |
| "loss": 0.5765, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.033908691655602084, |
| "grad_norm": 0.3286476731300354, |
| "learning_rate": 7.474669731423121e-06, |
| "loss": 0.4861, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.03393787297372222, |
| "grad_norm": 0.30678001046180725, |
| "learning_rate": 7.470662478000671e-06, |
| "loss": 0.5941, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.03396705429184236, |
| "grad_norm": 0.3462623357772827, |
| "learning_rate": 7.466653123854849e-06, |
| "loss": 1.0751, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.033996235609962504, |
| "grad_norm": 0.38402771949768066, |
| "learning_rate": 7.462641672394679e-06, |
| "loss": 0.6588, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.03402541692808264, |
| "grad_norm": 0.41210705041885376, |
| "learning_rate": 7.458628127030967e-06, |
| "loss": 0.8555, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.03405459824620278, |
| "grad_norm": 0.3831291198730469, |
| "learning_rate": 7.454612491176297e-06, |
| "loss": 0.4943, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.03408377956432292, |
| "grad_norm": 0.3251889944076538, |
| "learning_rate": 7.450594768245034e-06, |
| "loss": 0.2975, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.03411296088244306, |
| "grad_norm": 0.36370596289634705, |
| "learning_rate": 7.446574961653317e-06, |
| "loss": 0.6164, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.0341421422005632, |
| "grad_norm": 0.33374112844467163, |
| "learning_rate": 7.442553074819054e-06, |
| "loss": 1.1079, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.034171323518683336, |
| "grad_norm": 0.3854160010814667, |
| "learning_rate": 7.438529111161925e-06, |
| "loss": 1.1514, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.03420050483680348, |
| "grad_norm": 0.40380972623825073, |
| "learning_rate": 7.434503074103374e-06, |
| "loss": 0.7734, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.03422968615492362, |
| "grad_norm": 0.3524898290634155, |
| "learning_rate": 7.430474967066608e-06, |
| "loss": 0.642, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.034258867473043755, |
| "grad_norm": 0.3827463984489441, |
| "learning_rate": 7.426444793476595e-06, |
| "loss": 0.5628, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.0342880487911639, |
| "grad_norm": 0.3349953889846802, |
| "learning_rate": 7.422412556760059e-06, |
| "loss": 1.0224, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.03431723010928404, |
| "grad_norm": 0.335231214761734, |
| "learning_rate": 7.4183782603454805e-06, |
| "loss": 1.1931, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.034346411427404175, |
| "grad_norm": 0.3460281789302826, |
| "learning_rate": 7.414341907663087e-06, |
| "loss": 1.1259, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.03437559274552431, |
| "grad_norm": 0.34443578124046326, |
| "learning_rate": 7.41030350214486e-06, |
| "loss": 0.6355, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.03440477406364446, |
| "grad_norm": 0.35307174921035767, |
| "learning_rate": 7.406263047224521e-06, |
| "loss": 1.1428, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.034433955381764594, |
| "grad_norm": 0.3559095561504364, |
| "learning_rate": 7.402220546337537e-06, |
| "loss": 0.5705, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.03446313669988473, |
| "grad_norm": 0.31162285804748535, |
| "learning_rate": 7.398176002921116e-06, |
| "loss": 1.2843, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.034492318018004876, |
| "grad_norm": 0.49262160062789917, |
| "learning_rate": 7.394129420414195e-06, |
| "loss": 0.6993, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.034521499336125014, |
| "grad_norm": 0.5153334140777588, |
| "learning_rate": 7.390080802257458e-06, |
| "loss": 0.6353, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.03455068065424515, |
| "grad_norm": 0.4173062741756439, |
| "learning_rate": 7.386030151893309e-06, |
| "loss": 0.9928, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.03457986197236529, |
| "grad_norm": 0.4189921021461487, |
| "learning_rate": 7.381977472765882e-06, |
| "loss": 0.7961, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.03460904329048543, |
| "grad_norm": 0.35358503460884094, |
| "learning_rate": 7.377922768321039e-06, |
| "loss": 0.5969, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.03463822460860557, |
| "grad_norm": 0.3556291460990906, |
| "learning_rate": 7.373866042006362e-06, |
| "loss": 0.8025, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.03466740592672571, |
| "grad_norm": 0.3133077025413513, |
| "learning_rate": 7.369807297271153e-06, |
| "loss": 0.4916, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.03469658724484585, |
| "grad_norm": 0.33922457695007324, |
| "learning_rate": 7.365746537566428e-06, |
| "loss": 1.1155, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.03472576856296599, |
| "grad_norm": 0.35871583223342896, |
| "learning_rate": 7.36168376634492e-06, |
| "loss": 0.7662, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.03475494988108613, |
| "grad_norm": 0.3863649070262909, |
| "learning_rate": 7.35761898706107e-06, |
| "loss": 0.7088, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.034784131199206265, |
| "grad_norm": 0.32206130027770996, |
| "learning_rate": 7.353552203171026e-06, |
| "loss": 0.9686, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.03481331251732641, |
| "grad_norm": 0.38681933283805847, |
| "learning_rate": 7.349483418132641e-06, |
| "loss": 1.1279, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.03484249383544655, |
| "grad_norm": 0.3887804448604584, |
| "learning_rate": 7.345412635405469e-06, |
| "loss": 0.8602, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.034871675153566685, |
| "grad_norm": 0.33923155069351196, |
| "learning_rate": 7.3413398584507645e-06, |
| "loss": 0.4758, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.03490085647168683, |
| "grad_norm": 0.40425780415534973, |
| "learning_rate": 7.337265090731476e-06, |
| "loss": 0.6499, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.03493003778980697, |
| "grad_norm": 0.3890778422355652, |
| "learning_rate": 7.333188335712245e-06, |
| "loss": 0.6889, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.034959219107927104, |
| "grad_norm": 0.346636027097702, |
| "learning_rate": 7.329109596859402e-06, |
| "loss": 0.5432, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.03498840042604724, |
| "grad_norm": 0.42355212569236755, |
| "learning_rate": 7.3250288776409666e-06, |
| "loss": 1.2113, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.035017581744167386, |
| "grad_norm": 0.40179261565208435, |
| "learning_rate": 7.32094618152664e-06, |
| "loss": 0.7849, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.035046763062287524, |
| "grad_norm": 0.33406075835227966, |
| "learning_rate": 7.316861511987803e-06, |
| "loss": 0.5403, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.03507594438040766, |
| "grad_norm": 0.4153609573841095, |
| "learning_rate": 7.312774872497519e-06, |
| "loss": 0.5786, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.035105125698527806, |
| "grad_norm": 0.36570999026298523, |
| "learning_rate": 7.308686266530524e-06, |
| "loss": 0.5352, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.035134307016647943, |
| "grad_norm": 0.34021544456481934, |
| "learning_rate": 7.304595697563221e-06, |
| "loss": 0.6123, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.03516348833476808, |
| "grad_norm": 0.417905330657959, |
| "learning_rate": 7.3005031690736925e-06, |
| "loss": 0.9984, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.03519266965288822, |
| "grad_norm": 0.29179444909095764, |
| "learning_rate": 7.29640868454168e-06, |
| "loss": 0.9578, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.03522185097100836, |
| "grad_norm": 0.3782234489917755, |
| "learning_rate": 7.2923122474485875e-06, |
| "loss": 0.7584, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.0352510322891285, |
| "grad_norm": 0.4105994701385498, |
| "learning_rate": 7.288213861277482e-06, |
| "loss": 1.002, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.03528021360724864, |
| "grad_norm": 0.3522428572177887, |
| "learning_rate": 7.284113529513089e-06, |
| "loss": 0.6361, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.035309394925368776, |
| "grad_norm": 0.3227940499782562, |
| "learning_rate": 7.280011255641784e-06, |
| "loss": 0.4874, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.03533857624348892, |
| "grad_norm": 0.3854133188724518, |
| "learning_rate": 7.275907043151598e-06, |
| "loss": 0.7347, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.03536775756160906, |
| "grad_norm": 0.4847533702850342, |
| "learning_rate": 7.271800895532205e-06, |
| "loss": 1.5612, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.035396938879729195, |
| "grad_norm": 0.35076403617858887, |
| "learning_rate": 7.267692816274931e-06, |
| "loss": 0.5814, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.03542612019784934, |
| "grad_norm": 0.39935195446014404, |
| "learning_rate": 7.263582808872741e-06, |
| "loss": 0.7169, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.03545530151596948, |
| "grad_norm": 0.45061102509498596, |
| "learning_rate": 7.259470876820238e-06, |
| "loss": 0.7671, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.035484482834089615, |
| "grad_norm": 0.3380295932292938, |
| "learning_rate": 7.255357023613665e-06, |
| "loss": 0.4438, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.03551366415220975, |
| "grad_norm": 0.3709303140640259, |
| "learning_rate": 7.251241252750896e-06, |
| "loss": 0.622, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.0355428454703299, |
| "grad_norm": 0.4267682433128357, |
| "learning_rate": 7.247123567731434e-06, |
| "loss": 1.131, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.035572026788450034, |
| "grad_norm": 0.4169882535934448, |
| "learning_rate": 7.243003972056415e-06, |
| "loss": 0.6889, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.03560120810657017, |
| "grad_norm": 0.3335872292518616, |
| "learning_rate": 7.238882469228594e-06, |
| "loss": 0.5301, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.035630389424690316, |
| "grad_norm": 0.3823302090167999, |
| "learning_rate": 7.234759062752352e-06, |
| "loss": 0.5729, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.035659570742810454, |
| "grad_norm": 0.3532765805721283, |
| "learning_rate": 7.230633756133684e-06, |
| "loss": 0.5229, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.03568875206093059, |
| "grad_norm": 0.36836642026901245, |
| "learning_rate": 7.226506552880204e-06, |
| "loss": 0.5312, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.03571793337905073, |
| "grad_norm": 0.3310144543647766, |
| "learning_rate": 7.222377456501139e-06, |
| "loss": 0.4995, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.03574711469717087, |
| "grad_norm": 0.37875980138778687, |
| "learning_rate": 7.218246470507322e-06, |
| "loss": 0.565, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.03577629601529101, |
| "grad_norm": 0.4794166088104248, |
| "learning_rate": 7.214113598411197e-06, |
| "loss": 1.2709, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.03580547733341115, |
| "grad_norm": 0.3511941432952881, |
| "learning_rate": 7.209978843726809e-06, |
| "loss": 0.4233, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.03583465865153129, |
| "grad_norm": 0.338687002658844, |
| "learning_rate": 7.205842209969806e-06, |
| "loss": 0.5124, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.03586383996965143, |
| "grad_norm": 0.44972458481788635, |
| "learning_rate": 7.201703700657428e-06, |
| "loss": 0.4769, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.03589302128777157, |
| "grad_norm": 0.3388533890247345, |
| "learning_rate": 7.197563319308518e-06, |
| "loss": 0.582, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.035922202605891705, |
| "grad_norm": 0.38155123591423035, |
| "learning_rate": 7.193421069443507e-06, |
| "loss": 1.281, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.03595138392401185, |
| "grad_norm": 0.4024869501590729, |
| "learning_rate": 7.1892769545844095e-06, |
| "loss": 0.564, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.03598056524213199, |
| "grad_norm": 0.3802089989185333, |
| "learning_rate": 7.185130978254836e-06, |
| "loss": 1.0339, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.036009746560252125, |
| "grad_norm": 0.34473690390586853, |
| "learning_rate": 7.180983143979971e-06, |
| "loss": 0.9934, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.03603892787837227, |
| "grad_norm": 0.3550906777381897, |
| "learning_rate": 7.176833455286584e-06, |
| "loss": 0.5747, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.03606810919649241, |
| "grad_norm": 0.3604385256767273, |
| "learning_rate": 7.172681915703019e-06, |
| "loss": 0.4651, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.036097290514612544, |
| "grad_norm": 0.44548967480659485, |
| "learning_rate": 7.168528528759193e-06, |
| "loss": 1.3551, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.03612647183273268, |
| "grad_norm": 0.41413599252700806, |
| "learning_rate": 7.164373297986595e-06, |
| "loss": 0.5039, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.036155653150852826, |
| "grad_norm": 0.31594792008399963, |
| "learning_rate": 7.160216226918283e-06, |
| "loss": 0.4698, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.036184834468972964, |
| "grad_norm": 0.35538673400878906, |
| "learning_rate": 7.156057319088877e-06, |
| "loss": 0.5736, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.0362140157870931, |
| "grad_norm": 0.3683064579963684, |
| "learning_rate": 7.151896578034561e-06, |
| "loss": 0.5892, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.036243197105213246, |
| "grad_norm": 0.46832773089408875, |
| "learning_rate": 7.147734007293076e-06, |
| "loss": 0.6886, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.03627237842333338, |
| "grad_norm": 0.43364712595939636, |
| "learning_rate": 7.1435696104037215e-06, |
| "loss": 0.7977, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.03630155974145352, |
| "grad_norm": 0.4274609088897705, |
| "learning_rate": 7.139403390907345e-06, |
| "loss": 0.8793, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.03633074105957366, |
| "grad_norm": 0.398817777633667, |
| "learning_rate": 7.135235352346347e-06, |
| "loss": 1.8894, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.0363599223776938, |
| "grad_norm": 0.568053126335144, |
| "learning_rate": 7.131065498264675e-06, |
| "loss": 1.1279, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.03638910369581394, |
| "grad_norm": 0.37247219681739807, |
| "learning_rate": 7.12689383220782e-06, |
| "loss": 1.0723, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.03641828501393408, |
| "grad_norm": 0.3967159688472748, |
| "learning_rate": 7.122720357722812e-06, |
| "loss": 0.7578, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.03644746633205422, |
| "grad_norm": 0.39001792669296265, |
| "learning_rate": 7.11854507835822e-06, |
| "loss": 0.9283, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.03647664765017436, |
| "grad_norm": 0.3803122937679291, |
| "learning_rate": 7.1143679976641465e-06, |
| "loss": 1.0353, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.0365058289682945, |
| "grad_norm": 0.4342098832130432, |
| "learning_rate": 7.1101891191922275e-06, |
| "loss": 1.0964, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.036535010286414635, |
| "grad_norm": 0.3213995397090912, |
| "learning_rate": 7.106008446495627e-06, |
| "loss": 0.4175, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.03656419160453478, |
| "grad_norm": 0.3426913022994995, |
| "learning_rate": 7.101825983129031e-06, |
| "loss": 0.4272, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.03659337292265492, |
| "grad_norm": 0.4621524512767792, |
| "learning_rate": 7.097641732648653e-06, |
| "loss": 0.5503, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.036622554240775054, |
| "grad_norm": 0.3674916625022888, |
| "learning_rate": 7.093455698612227e-06, |
| "loss": 0.6504, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.0366517355588952, |
| "grad_norm": 0.3693106472492218, |
| "learning_rate": 7.089267884578995e-06, |
| "loss": 0.6054, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.036680916877015336, |
| "grad_norm": 0.3468042314052582, |
| "learning_rate": 7.085078294109722e-06, |
| "loss": 1.0761, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.036710098195135474, |
| "grad_norm": 0.3826884329319, |
| "learning_rate": 7.080886930766679e-06, |
| "loss": 1.1468, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.03673927951325561, |
| "grad_norm": 0.5449230670928955, |
| "learning_rate": 7.076693798113643e-06, |
| "loss": 1.5713, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.036768460831375756, |
| "grad_norm": 0.35776931047439575, |
| "learning_rate": 7.072498899715899e-06, |
| "loss": 1.135, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.03679764214949589, |
| "grad_norm": 0.3925883173942566, |
| "learning_rate": 7.068302239140232e-06, |
| "loss": 0.6657, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.03682682346761603, |
| "grad_norm": 0.3655458688735962, |
| "learning_rate": 7.064103819954925e-06, |
| "loss": 0.4577, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.03685600478573617, |
| "grad_norm": 0.39803630113601685, |
| "learning_rate": 7.059903645729753e-06, |
| "loss": 1.2144, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.03688518610385631, |
| "grad_norm": 0.4239751994609833, |
| "learning_rate": 7.0557017200359925e-06, |
| "loss": 1.0181, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.03691436742197645, |
| "grad_norm": 0.38729363679885864, |
| "learning_rate": 7.051498046446399e-06, |
| "loss": 0.9135, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.03694354874009659, |
| "grad_norm": 0.3446405827999115, |
| "learning_rate": 7.047292628535219e-06, |
| "loss": 0.4713, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.03697273005821673, |
| "grad_norm": 0.36260929703712463, |
| "learning_rate": 7.043085469878183e-06, |
| "loss": 1.1579, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.03700191137633687, |
| "grad_norm": 0.3197498023509979, |
| "learning_rate": 7.038876574052498e-06, |
| "loss": 0.3499, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.03703109269445701, |
| "grad_norm": 0.3071781098842621, |
| "learning_rate": 7.034665944636853e-06, |
| "loss": 0.526, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.037060274012577145, |
| "grad_norm": 0.3225739002227783, |
| "learning_rate": 7.030453585211405e-06, |
| "loss": 1.1386, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.03708945533069729, |
| "grad_norm": 0.37676727771759033, |
| "learning_rate": 7.0262394993577875e-06, |
| "loss": 0.7368, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.03711863664881743, |
| "grad_norm": 0.46438324451446533, |
| "learning_rate": 7.022023690659101e-06, |
| "loss": 1.753, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.037147817966937564, |
| "grad_norm": 0.35239091515541077, |
| "learning_rate": 7.017806162699905e-06, |
| "loss": 0.5179, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.03717699928505771, |
| "grad_norm": 0.34967631101608276, |
| "learning_rate": 7.013586919066231e-06, |
| "loss": 0.6739, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.037206180603177846, |
| "grad_norm": 0.33838579058647156, |
| "learning_rate": 7.009365963345558e-06, |
| "loss": 0.5408, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.037235361921297984, |
| "grad_norm": 0.4194817543029785, |
| "learning_rate": 7.005143299126831e-06, |
| "loss": 0.555, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.03726454323941812, |
| "grad_norm": 0.31437215209007263, |
| "learning_rate": 7.000918930000441e-06, |
| "loss": 0.494, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.037293724557538266, |
| "grad_norm": 0.4067825973033905, |
| "learning_rate": 6.996692859558229e-06, |
| "loss": 1.2538, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.0373229058756584, |
| "grad_norm": 0.3817364573478699, |
| "learning_rate": 6.9924650913934875e-06, |
| "loss": 0.8129, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.03735208719377854, |
| "grad_norm": 0.3235669732093811, |
| "learning_rate": 6.9882356291009475e-06, |
| "loss": 0.5634, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.037381268511898685, |
| "grad_norm": 0.583557665348053, |
| "learning_rate": 6.9840044762767825e-06, |
| "loss": 1.7441, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.03741044983001882, |
| "grad_norm": 0.31337982416152954, |
| "learning_rate": 6.979771636518604e-06, |
| "loss": 0.446, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.03743963114813896, |
| "grad_norm": 0.4196406900882721, |
| "learning_rate": 6.975537113425457e-06, |
| "loss": 0.7375, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.0374688124662591, |
| "grad_norm": 0.37400898337364197, |
| "learning_rate": 6.9713009105978144e-06, |
| "loss": 0.7989, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.03749799378437924, |
| "grad_norm": 0.436960369348526, |
| "learning_rate": 6.967063031637586e-06, |
| "loss": 0.8025, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.03752717510249938, |
| "grad_norm": 0.36592885851860046, |
| "learning_rate": 6.962823480148098e-06, |
| "loss": 0.4142, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.03755635642061952, |
| "grad_norm": 0.36234551668167114, |
| "learning_rate": 6.958582259734101e-06, |
| "loss": 0.614, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.03758553773873966, |
| "grad_norm": 0.3413221538066864, |
| "learning_rate": 6.954339374001771e-06, |
| "loss": 1.1503, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.0376147190568598, |
| "grad_norm": 0.5283128023147583, |
| "learning_rate": 6.950094826558689e-06, |
| "loss": 0.89, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.03764390037497994, |
| "grad_norm": 0.4031014144420624, |
| "learning_rate": 6.945848621013857e-06, |
| "loss": 0.7177, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.037673081693100074, |
| "grad_norm": 0.32361945509910583, |
| "learning_rate": 6.941600760977686e-06, |
| "loss": 0.436, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.03770226301122022, |
| "grad_norm": 0.5514517426490784, |
| "learning_rate": 6.93735125006199e-06, |
| "loss": 1.769, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.037731444329340356, |
| "grad_norm": 0.3868471086025238, |
| "learning_rate": 6.93310009187999e-06, |
| "loss": 0.666, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.037760625647460494, |
| "grad_norm": 0.4755808413028717, |
| "learning_rate": 6.928847290046306e-06, |
| "loss": 0.529, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.03778980696558064, |
| "grad_norm": 0.5227566957473755, |
| "learning_rate": 6.924592848176958e-06, |
| "loss": 0.818, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.037818988283700776, |
| "grad_norm": 0.33742743730545044, |
| "learning_rate": 6.920336769889355e-06, |
| "loss": 0.5325, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.037848169601820913, |
| "grad_norm": 0.42309924960136414, |
| "learning_rate": 6.916079058802303e-06, |
| "loss": 0.5331, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.03787735091994105, |
| "grad_norm": 0.370309978723526, |
| "learning_rate": 6.911819718535995e-06, |
| "loss": 1.2243, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.037906532238061195, |
| "grad_norm": 0.4266452491283417, |
| "learning_rate": 6.907558752712007e-06, |
| "loss": 1.3171, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.03793571355618133, |
| "grad_norm": 0.3490223288536072, |
| "learning_rate": 6.903296164953299e-06, |
| "loss": 0.8973, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.03796489487430147, |
| "grad_norm": 0.37266805768013, |
| "learning_rate": 6.899031958884211e-06, |
| "loss": 0.9855, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.037994076192421615, |
| "grad_norm": 0.5146600604057312, |
| "learning_rate": 6.894766138130458e-06, |
| "loss": 1.0542, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.03802325751054175, |
| "grad_norm": 0.3761359453201294, |
| "learning_rate": 6.890498706319123e-06, |
| "loss": 0.6197, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.03805243882866189, |
| "grad_norm": 0.4903143048286438, |
| "learning_rate": 6.8862296670786686e-06, |
| "loss": 0.8525, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.03808162014678203, |
| "grad_norm": 0.3612405061721802, |
| "learning_rate": 6.8819590240389186e-06, |
| "loss": 0.5752, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.03811080146490217, |
| "grad_norm": 0.45921143889427185, |
| "learning_rate": 6.877686780831058e-06, |
| "loss": 0.8256, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.03813998278302231, |
| "grad_norm": 0.3330605924129486, |
| "learning_rate": 6.873412941087639e-06, |
| "loss": 1.1883, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.03816916410114245, |
| "grad_norm": 0.35477909445762634, |
| "learning_rate": 6.869137508442563e-06, |
| "loss": 0.6121, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.03819834541926259, |
| "grad_norm": 0.3433743417263031, |
| "learning_rate": 6.864860486531092e-06, |
| "loss": 1.1423, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.03822752673738273, |
| "grad_norm": 0.3879603147506714, |
| "learning_rate": 6.860581878989841e-06, |
| "loss": 1.1482, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.03825670805550287, |
| "grad_norm": 0.36910051107406616, |
| "learning_rate": 6.8563016894567655e-06, |
| "loss": 0.425, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.038285889373623004, |
| "grad_norm": 0.30039140582084656, |
| "learning_rate": 6.852019921571172e-06, |
| "loss": 0.7802, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.03831507069174315, |
| "grad_norm": 0.39489609003067017, |
| "learning_rate": 6.8477365789737084e-06, |
| "loss": 0.683, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.038344252009863286, |
| "grad_norm": 0.3789381980895996, |
| "learning_rate": 6.84345166530636e-06, |
| "loss": 0.933, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.038373433327983424, |
| "grad_norm": 0.37322449684143066, |
| "learning_rate": 6.8391651842124465e-06, |
| "loss": 0.8413, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.03840261464610356, |
| "grad_norm": 0.34380072355270386, |
| "learning_rate": 6.834877139336627e-06, |
| "loss": 1.2004, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.038431795964223706, |
| "grad_norm": 0.4074583351612091, |
| "learning_rate": 6.8305875343248805e-06, |
| "loss": 0.7647, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.03846097728234384, |
| "grad_norm": 0.47075557708740234, |
| "learning_rate": 6.82629637282452e-06, |
| "loss": 0.869, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.03849015860046398, |
| "grad_norm": 0.4043753147125244, |
| "learning_rate": 6.82200365848418e-06, |
| "loss": 0.9235, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.038519339918584125, |
| "grad_norm": 0.39343154430389404, |
| "learning_rate": 6.817709394953815e-06, |
| "loss": 0.6693, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.03854852123670426, |
| "grad_norm": 0.3302392065525055, |
| "learning_rate": 6.813413585884695e-06, |
| "loss": 1.0758, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.0385777025548244, |
| "grad_norm": 0.38957786560058594, |
| "learning_rate": 6.809116234929407e-06, |
| "loss": 1.0992, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.03860688387294454, |
| "grad_norm": 0.4037785828113556, |
| "learning_rate": 6.804817345741848e-06, |
| "loss": 1.2897, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.03863606519106468, |
| "grad_norm": 0.4564575254917145, |
| "learning_rate": 6.8005169219772204e-06, |
| "loss": 0.7094, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.03866524650918482, |
| "grad_norm": 0.4328705668449402, |
| "learning_rate": 6.796214967292037e-06, |
| "loss": 1.1438, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.03869442782730496, |
| "grad_norm": 0.40293389558792114, |
| "learning_rate": 6.791911485344106e-06, |
| "loss": 1.175, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.0387236091454251, |
| "grad_norm": 0.36415475606918335, |
| "learning_rate": 6.787606479792539e-06, |
| "loss": 0.7468, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.03875279046354524, |
| "grad_norm": 0.36846107244491577, |
| "learning_rate": 6.783299954297739e-06, |
| "loss": 0.7489, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.03878197178166538, |
| "grad_norm": 0.3493446111679077, |
| "learning_rate": 6.778991912521409e-06, |
| "loss": 0.5722, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.038811153099785514, |
| "grad_norm": 0.3452781140804291, |
| "learning_rate": 6.774682358126528e-06, |
| "loss": 1.1173, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.03884033441790566, |
| "grad_norm": 0.3552137315273285, |
| "learning_rate": 6.7703712947773745e-06, |
| "loss": 0.5815, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.038869515736025796, |
| "grad_norm": 0.34995153546333313, |
| "learning_rate": 6.766058726139504e-06, |
| "loss": 0.4325, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.038898697054145934, |
| "grad_norm": 0.3551886975765228, |
| "learning_rate": 6.7617446558797495e-06, |
| "loss": 0.5186, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.03892787837226608, |
| "grad_norm": 0.3237588703632355, |
| "learning_rate": 6.757429087666225e-06, |
| "loss": 0.5527, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.038957059690386216, |
| "grad_norm": 0.3702009916305542, |
| "learning_rate": 6.753112025168318e-06, |
| "loss": 1.0722, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.03898624100850635, |
| "grad_norm": 0.3234436810016632, |
| "learning_rate": 6.748793472056685e-06, |
| "loss": 1.1065, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.03901542232662649, |
| "grad_norm": 0.36116692423820496, |
| "learning_rate": 6.744473432003246e-06, |
| "loss": 1.2237, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.039044603644746635, |
| "grad_norm": 0.33229708671569824, |
| "learning_rate": 6.740151908681198e-06, |
| "loss": 0.4987, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.03907378496286677, |
| "grad_norm": 0.3865174949169159, |
| "learning_rate": 6.735828905764984e-06, |
| "loss": 0.5901, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.03910296628098691, |
| "grad_norm": 0.333687961101532, |
| "learning_rate": 6.731504426930313e-06, |
| "loss": 1.103, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.039132147599107055, |
| "grad_norm": 0.4775073230266571, |
| "learning_rate": 6.727178475854149e-06, |
| "loss": 0.5692, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.03916132891722719, |
| "grad_norm": 0.3877047002315521, |
| "learning_rate": 6.722851056214705e-06, |
| "loss": 0.7231, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.03919051023534733, |
| "grad_norm": 0.36615628004074097, |
| "learning_rate": 6.718522171691446e-06, |
| "loss": 1.0698, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.03921969155346747, |
| "grad_norm": 0.36412593722343445, |
| "learning_rate": 6.714191825965079e-06, |
| "loss": 0.6699, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.03924887287158761, |
| "grad_norm": 0.48308536410331726, |
| "learning_rate": 6.709860022717556e-06, |
| "loss": 0.715, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.03927805418970775, |
| "grad_norm": 0.29396340250968933, |
| "learning_rate": 6.705526765632068e-06, |
| "loss": 0.3743, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.03930723550782789, |
| "grad_norm": 0.38491395115852356, |
| "learning_rate": 6.701192058393039e-06, |
| "loss": 0.6031, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.03933641682594803, |
| "grad_norm": 0.3948901295661926, |
| "learning_rate": 6.696855904686133e-06, |
| "loss": 1.3799, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.03936559814406817, |
| "grad_norm": 0.4337192475795746, |
| "learning_rate": 6.6925183081982335e-06, |
| "loss": 1.599, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.039394779462188306, |
| "grad_norm": 0.45100483298301697, |
| "learning_rate": 6.688179272617462e-06, |
| "loss": 0.4357, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.039423960780308444, |
| "grad_norm": 0.34771525859832764, |
| "learning_rate": 6.683838801633157e-06, |
| "loss": 1.1212, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.03945314209842859, |
| "grad_norm": 0.35747215151786804, |
| "learning_rate": 6.679496898935875e-06, |
| "loss": 0.5497, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.039482323416548726, |
| "grad_norm": 0.3778969943523407, |
| "learning_rate": 6.6751535682174e-06, |
| "loss": 1.0852, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.03951150473466886, |
| "grad_norm": 0.4144923985004425, |
| "learning_rate": 6.67080881317072e-06, |
| "loss": 0.732, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.03954068605278901, |
| "grad_norm": 0.5575356483459473, |
| "learning_rate": 6.666462637490037e-06, |
| "loss": 0.7488, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.039569867370909145, |
| "grad_norm": 0.3612348139286041, |
| "learning_rate": 6.6621150448707635e-06, |
| "loss": 0.5648, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.03959904868902928, |
| "grad_norm": 0.3603819012641907, |
| "learning_rate": 6.657766039009517e-06, |
| "loss": 0.5573, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.03962823000714942, |
| "grad_norm": 0.41484981775283813, |
| "learning_rate": 6.653415623604109e-06, |
| "loss": 1.589, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.039657411325269565, |
| "grad_norm": 0.3955146074295044, |
| "learning_rate": 6.649063802353563e-06, |
| "loss": 1.2515, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.0396865926433897, |
| "grad_norm": 0.35163095593452454, |
| "learning_rate": 6.6447105789580825e-06, |
| "loss": 0.6892, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.03971577396150984, |
| "grad_norm": 0.3816877007484436, |
| "learning_rate": 6.640355957119075e-06, |
| "loss": 0.6176, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.039744955279629984, |
| "grad_norm": 0.3277455270290375, |
| "learning_rate": 6.635999940539133e-06, |
| "loss": 0.5275, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.03977413659775012, |
| "grad_norm": 0.4182659983634949, |
| "learning_rate": 6.6316425329220325e-06, |
| "loss": 0.9713, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.03980331791587026, |
| "grad_norm": 0.394196093082428, |
| "learning_rate": 6.627283737972735e-06, |
| "loss": 1.2857, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.0398324992339904, |
| "grad_norm": 1.2104586362838745, |
| "learning_rate": 6.622923559397383e-06, |
| "loss": 0.4741, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.03986168055211054, |
| "grad_norm": 0.3945883810520172, |
| "learning_rate": 6.61856200090329e-06, |
| "loss": 0.9591, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.03989086187023068, |
| "grad_norm": 0.31628480553627014, |
| "learning_rate": 6.614199066198951e-06, |
| "loss": 0.3501, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.039920043188350816, |
| "grad_norm": 0.3783208131790161, |
| "learning_rate": 6.609834758994023e-06, |
| "loss": 1.3652, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.039949224506470954, |
| "grad_norm": 0.367964506149292, |
| "learning_rate": 6.605469082999333e-06, |
| "loss": 0.6027, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.0399784058245911, |
| "grad_norm": 0.3827815055847168, |
| "learning_rate": 6.601102041926875e-06, |
| "loss": 0.5882, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.040007587142711236, |
| "grad_norm": 0.3371301293373108, |
| "learning_rate": 6.596733639489798e-06, |
| "loss": 1.1528, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.04003676846083137, |
| "grad_norm": 0.3957504630088806, |
| "learning_rate": 6.592363879402415e-06, |
| "loss": 0.6925, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.04006594977895152, |
| "grad_norm": 0.3958311378955841, |
| "learning_rate": 6.5879927653801875e-06, |
| "loss": 1.181, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.040095131097071655, |
| "grad_norm": 0.3562775254249573, |
| "learning_rate": 6.583620301139731e-06, |
| "loss": 0.6058, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.04012431241519179, |
| "grad_norm": 0.4353269338607788, |
| "learning_rate": 6.579246490398807e-06, |
| "loss": 0.8798, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.04015349373331193, |
| "grad_norm": 0.35366541147232056, |
| "learning_rate": 6.574871336876328e-06, |
| "loss": 0.5737, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.040182675051432075, |
| "grad_norm": 0.42591527104377747, |
| "learning_rate": 6.5704948442923386e-06, |
| "loss": 1.1687, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.04021185636955221, |
| "grad_norm": 0.4234766662120819, |
| "learning_rate": 6.5661170163680295e-06, |
| "loss": 1.9626, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.04024103768767235, |
| "grad_norm": 0.39508822560310364, |
| "learning_rate": 6.561737856825726e-06, |
| "loss": 1.6789, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.040270219005792494, |
| "grad_norm": 0.3649897575378418, |
| "learning_rate": 6.55735736938888e-06, |
| "loss": 0.5639, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.04029940032391263, |
| "grad_norm": 0.3846116065979004, |
| "learning_rate": 6.552975557782081e-06, |
| "loss": 0.5498, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.04032858164203277, |
| "grad_norm": 0.3876710832118988, |
| "learning_rate": 6.548592425731035e-06, |
| "loss": 0.5363, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.04035776296015291, |
| "grad_norm": 0.34960341453552246, |
| "learning_rate": 6.5442079769625785e-06, |
| "loss": 0.6119, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.04038694427827305, |
| "grad_norm": 0.3467157781124115, |
| "learning_rate": 6.539822215204666e-06, |
| "loss": 1.1284, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.04041612559639319, |
| "grad_norm": 0.4058839678764343, |
| "learning_rate": 6.535435144186365e-06, |
| "loss": 1.0208, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.040445306914513326, |
| "grad_norm": 0.4504760801792145, |
| "learning_rate": 6.531046767637856e-06, |
| "loss": 0.5742, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.04047448823263347, |
| "grad_norm": 0.493252158164978, |
| "learning_rate": 6.526657089290438e-06, |
| "loss": 0.5616, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.04050366955075361, |
| "grad_norm": 0.4324471950531006, |
| "learning_rate": 6.5222661128765056e-06, |
| "loss": 1.1836, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.040532850868873746, |
| "grad_norm": 0.3789842128753662, |
| "learning_rate": 6.517873842129563e-06, |
| "loss": 1.1208, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.040562032186993884, |
| "grad_norm": 0.4028272032737732, |
| "learning_rate": 6.513480280784217e-06, |
| "loss": 0.5723, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.04059121350511403, |
| "grad_norm": 0.3676299750804901, |
| "learning_rate": 6.509085432576165e-06, |
| "loss": 0.6375, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.040620394823234166, |
| "grad_norm": 0.42374491691589355, |
| "learning_rate": 6.504689301242204e-06, |
| "loss": 0.8473, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.0406495761413543, |
| "grad_norm": 0.3679426312446594, |
| "learning_rate": 6.500291890520222e-06, |
| "loss": 0.9418, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.04067875745947445, |
| "grad_norm": 0.3320949077606201, |
| "learning_rate": 6.495893204149191e-06, |
| "loss": 0.4823, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.040707938777594585, |
| "grad_norm": 0.6323228478431702, |
| "learning_rate": 6.491493245869171e-06, |
| "loss": 0.8586, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.04073712009571472, |
| "grad_norm": 0.3451409339904785, |
| "learning_rate": 6.487092019421302e-06, |
| "loss": 0.4333, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.04076630141383486, |
| "grad_norm": 0.4376499056816101, |
| "learning_rate": 6.482689528547804e-06, |
| "loss": 0.4691, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.040795482731955005, |
| "grad_norm": 0.3368481397628784, |
| "learning_rate": 6.478285776991971e-06, |
| "loss": 0.4143, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.04082466405007514, |
| "grad_norm": 0.40253746509552, |
| "learning_rate": 6.473880768498164e-06, |
| "loss": 1.0729, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.04085384536819528, |
| "grad_norm": 0.36722445487976074, |
| "learning_rate": 6.469474506811824e-06, |
| "loss": 0.5995, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.040883026686315424, |
| "grad_norm": 0.3319557309150696, |
| "learning_rate": 6.465066995679446e-06, |
| "loss": 1.1651, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.04091220800443556, |
| "grad_norm": 0.48606428503990173, |
| "learning_rate": 6.460658238848594e-06, |
| "loss": 0.9981, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.0409413893225557, |
| "grad_norm": 0.40726184844970703, |
| "learning_rate": 6.456248240067892e-06, |
| "loss": 0.6836, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.04097057064067584, |
| "grad_norm": 0.4394870698451996, |
| "learning_rate": 6.4518370030870134e-06, |
| "loss": 1.0507, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.04099975195879598, |
| "grad_norm": 0.39917951822280884, |
| "learning_rate": 6.44742453165669e-06, |
| "loss": 0.6702, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.04102893327691612, |
| "grad_norm": 0.4328409433364868, |
| "learning_rate": 6.443010829528703e-06, |
| "loss": 0.6955, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.041058114595036256, |
| "grad_norm": 0.35211169719696045, |
| "learning_rate": 6.438595900455877e-06, |
| "loss": 0.6292, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.0410872959131564, |
| "grad_norm": 0.3609704077243805, |
| "learning_rate": 6.434179748192082e-06, |
| "loss": 0.7085, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.04111647723127654, |
| "grad_norm": 0.35612180829048157, |
| "learning_rate": 6.429762376492229e-06, |
| "loss": 0.9625, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.041145658549396676, |
| "grad_norm": 0.423085480928421, |
| "learning_rate": 6.425343789112263e-06, |
| "loss": 0.7932, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.04117483986751681, |
| "grad_norm": 0.35695165395736694, |
| "learning_rate": 6.420923989809163e-06, |
| "loss": 0.4893, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.04120402118563696, |
| "grad_norm": 0.3439449071884155, |
| "learning_rate": 6.416502982340944e-06, |
| "loss": 0.5134, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.041233202503757095, |
| "grad_norm": 0.4308573603630066, |
| "learning_rate": 6.412080770466638e-06, |
| "loss": 0.7367, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.04126238382187723, |
| "grad_norm": 0.37188485264778137, |
| "learning_rate": 6.407657357946312e-06, |
| "loss": 1.2488, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.04129156513999738, |
| "grad_norm": 0.4908686876296997, |
| "learning_rate": 6.403232748541046e-06, |
| "loss": 0.9023, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.041320746458117515, |
| "grad_norm": 0.3698756694793701, |
| "learning_rate": 6.39880694601294e-06, |
| "loss": 0.5699, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.04134992777623765, |
| "grad_norm": 0.45328912138938904, |
| "learning_rate": 6.39437995412511e-06, |
| "loss": 1.054, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.04137910909435779, |
| "grad_norm": 0.4654984772205353, |
| "learning_rate": 6.389951776641683e-06, |
| "loss": 1.2852, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.041408290412477934, |
| "grad_norm": 0.31282249093055725, |
| "learning_rate": 6.385522417327792e-06, |
| "loss": 0.4093, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.04143747173059807, |
| "grad_norm": 0.4321572482585907, |
| "learning_rate": 6.381091879949576e-06, |
| "loss": 1.0905, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.04146665304871821, |
| "grad_norm": 0.4232565760612488, |
| "learning_rate": 6.376660168274176e-06, |
| "loss": 0.9107, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.04149583436683835, |
| "grad_norm": 0.4155007302761078, |
| "learning_rate": 6.3722272860697335e-06, |
| "loss": 1.1674, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.04152501568495849, |
| "grad_norm": 0.35389575362205505, |
| "learning_rate": 6.367793237105378e-06, |
| "loss": 0.4146, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.04155419700307863, |
| "grad_norm": 0.3538707494735718, |
| "learning_rate": 6.363358025151241e-06, |
| "loss": 0.5222, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.041583378321198766, |
| "grad_norm": 0.30130571126937866, |
| "learning_rate": 6.358921653978436e-06, |
| "loss": 0.3255, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.04161255963931891, |
| "grad_norm": 0.3739508092403412, |
| "learning_rate": 6.354484127359062e-06, |
| "loss": 1.1239, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.04164174095743905, |
| "grad_norm": 0.34718695282936096, |
| "learning_rate": 6.350045449066207e-06, |
| "loss": 0.6708, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.041670922275559186, |
| "grad_norm": 0.40969929099082947, |
| "learning_rate": 6.3456056228739315e-06, |
| "loss": 0.8285, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.04170010359367932, |
| "grad_norm": 0.5399494767189026, |
| "learning_rate": 6.341164652557272e-06, |
| "loss": 0.4426, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.04172928491179947, |
| "grad_norm": 0.43993720412254333, |
| "learning_rate": 6.336722541892244e-06, |
| "loss": 0.7278, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.041758466229919605, |
| "grad_norm": 0.3477911353111267, |
| "learning_rate": 6.332279294655828e-06, |
| "loss": 0.573, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.04178764754803974, |
| "grad_norm": 0.44330674409866333, |
| "learning_rate": 6.3278349146259675e-06, |
| "loss": 1.1454, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.04181682886615989, |
| "grad_norm": 0.4085425138473511, |
| "learning_rate": 6.3233894055815795e-06, |
| "loss": 0.6069, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.041846010184280025, |
| "grad_norm": 0.3401845097541809, |
| "learning_rate": 6.31894277130253e-06, |
| "loss": 0.4646, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.04187519150240016, |
| "grad_norm": 0.3752526044845581, |
| "learning_rate": 6.314495015569647e-06, |
| "loss": 1.2853, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.0419043728205203, |
| "grad_norm": 0.3284650444984436, |
| "learning_rate": 6.310046142164713e-06, |
| "loss": 0.4226, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.041933554138640444, |
| "grad_norm": 0.39148062467575073, |
| "learning_rate": 6.305596154870459e-06, |
| "loss": 0.5152, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.04196273545676058, |
| "grad_norm": 0.4285087287425995, |
| "learning_rate": 6.301145057470563e-06, |
| "loss": 1.0063, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.04199191677488072, |
| "grad_norm": 0.337423175573349, |
| "learning_rate": 6.296692853749648e-06, |
| "loss": 0.4357, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.042021098093000864, |
| "grad_norm": 0.3672419488430023, |
| "learning_rate": 6.2922395474932765e-06, |
| "loss": 0.6746, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.042050279411121, |
| "grad_norm": 0.35064423084259033, |
| "learning_rate": 6.287785142487948e-06, |
| "loss": 1.2374, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.04207946072924114, |
| "grad_norm": 0.3246432840824127, |
| "learning_rate": 6.283329642521099e-06, |
| "loss": 0.7072, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.042108642047361276, |
| "grad_norm": 0.37220683693885803, |
| "learning_rate": 6.2788730513810936e-06, |
| "loss": 0.7413, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.04213782336548142, |
| "grad_norm": 0.42576122283935547, |
| "learning_rate": 6.274415372857225e-06, |
| "loss": 0.9966, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.04216700468360156, |
| "grad_norm": 0.3823949992656708, |
| "learning_rate": 6.269956610739714e-06, |
| "loss": 0.4135, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.042196186001721696, |
| "grad_norm": 0.3482106328010559, |
| "learning_rate": 6.265496768819696e-06, |
| "loss": 0.6115, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.04222536731984184, |
| "grad_norm": 0.38117367029190063, |
| "learning_rate": 6.261035850889235e-06, |
| "loss": 0.7127, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.04225454863796198, |
| "grad_norm": 0.3146565854549408, |
| "learning_rate": 6.2565738607412944e-06, |
| "loss": 0.4629, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.042283729956082115, |
| "grad_norm": 0.34732186794281006, |
| "learning_rate": 6.252110802169764e-06, |
| "loss": 0.5771, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.04231291127420225, |
| "grad_norm": 0.573706865310669, |
| "learning_rate": 6.247646678969435e-06, |
| "loss": 1.0432, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.0423420925923224, |
| "grad_norm": 0.34539684653282166, |
| "learning_rate": 6.243181494936006e-06, |
| "loss": 0.4741, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.042371273910442535, |
| "grad_norm": 0.3698411285877228, |
| "learning_rate": 6.2387152538660745e-06, |
| "loss": 0.6515, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.04240045522856267, |
| "grad_norm": 0.37392696738243103, |
| "learning_rate": 6.234247959557142e-06, |
| "loss": 1.5626, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.04242963654668282, |
| "grad_norm": 0.4814490079879761, |
| "learning_rate": 6.2297796158076005e-06, |
| "loss": 0.6886, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.042458817864802954, |
| "grad_norm": 0.3873136341571808, |
| "learning_rate": 6.225310226416738e-06, |
| "loss": 0.7157, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.04248799918292309, |
| "grad_norm": 0.31437286734580994, |
| "learning_rate": 6.220839795184726e-06, |
| "loss": 0.3766, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.04251718050104323, |
| "grad_norm": 0.39689576625823975, |
| "learning_rate": 6.216368325912629e-06, |
| "loss": 0.6615, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.042546361819163374, |
| "grad_norm": 0.373020201921463, |
| "learning_rate": 6.2118958224023915e-06, |
| "loss": 0.5536, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.04257554313728351, |
| "grad_norm": 0.3472708761692047, |
| "learning_rate": 6.207422288456836e-06, |
| "loss": 1.0156, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.04260472445540365, |
| "grad_norm": 0.37166836857795715, |
| "learning_rate": 6.202947727879658e-06, |
| "loss": 0.7258, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.04263390577352379, |
| "grad_norm": 0.36742669343948364, |
| "learning_rate": 6.198472144475437e-06, |
| "loss": 0.6225, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.04266308709164393, |
| "grad_norm": 0.39598309993743896, |
| "learning_rate": 6.193995542049609e-06, |
| "loss": 0.6949, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.04269226840976407, |
| "grad_norm": 0.3932039141654968, |
| "learning_rate": 6.189517924408485e-06, |
| "loss": 1.1567, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.042721449727884206, |
| "grad_norm": 0.34061533212661743, |
| "learning_rate": 6.185039295359235e-06, |
| "loss": 0.4984, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.04275063104600435, |
| "grad_norm": 0.3698158264160156, |
| "learning_rate": 6.180559658709893e-06, |
| "loss": 0.6595, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.04277981236412449, |
| "grad_norm": 0.3264634609222412, |
| "learning_rate": 6.176079018269345e-06, |
| "loss": 0.4444, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.042808993682244625, |
| "grad_norm": 0.33824777603149414, |
| "learning_rate": 6.171597377847335e-06, |
| "loss": 0.4791, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.04283817500036477, |
| "grad_norm": 0.3647175431251526, |
| "learning_rate": 6.167114741254452e-06, |
| "loss": 0.6941, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.04286735631848491, |
| "grad_norm": 0.3616830110549927, |
| "learning_rate": 6.162631112302138e-06, |
| "loss": 1.0132, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.042896537636605045, |
| "grad_norm": 0.4097737669944763, |
| "learning_rate": 6.1581464948026745e-06, |
| "loss": 1.0722, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.04292571895472518, |
| "grad_norm": 0.3173997402191162, |
| "learning_rate": 6.153660892569184e-06, |
| "loss": 0.327, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.04295490027284533, |
| "grad_norm": 0.36846423149108887, |
| "learning_rate": 6.14917430941563e-06, |
| "loss": 0.6397, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.042984081590965464, |
| "grad_norm": 0.38134676218032837, |
| "learning_rate": 6.144686749156803e-06, |
| "loss": 0.6632, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.0430132629090856, |
| "grad_norm": 0.38962602615356445, |
| "learning_rate": 6.140198215608333e-06, |
| "loss": 0.6959, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.04304244422720574, |
| "grad_norm": 0.38198548555374146, |
| "learning_rate": 6.13570871258667e-06, |
| "loss": 0.9519, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.043071625545325884, |
| "grad_norm": 0.3785431385040283, |
| "learning_rate": 6.131218243909092e-06, |
| "loss": 0.5735, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.04310080686344602, |
| "grad_norm": 0.3394271433353424, |
| "learning_rate": 6.126726813393698e-06, |
| "loss": 0.5071, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.04312998818156616, |
| "grad_norm": 0.37844836711883545, |
| "learning_rate": 6.122234424859404e-06, |
| "loss": 0.7508, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.0431591694996863, |
| "grad_norm": 0.35576146841049194, |
| "learning_rate": 6.117741082125939e-06, |
| "loss": 0.9045, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.04318835081780644, |
| "grad_norm": 0.3391680419445038, |
| "learning_rate": 6.113246789013849e-06, |
| "loss": 0.5658, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.04321753213592658, |
| "grad_norm": 0.3584500849246979, |
| "learning_rate": 6.108751549344481e-06, |
| "loss": 0.6301, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.043246713454046716, |
| "grad_norm": 0.3853852152824402, |
| "learning_rate": 6.10425536693999e-06, |
| "loss": 1.3742, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.04327589477216686, |
| "grad_norm": 0.42832931876182556, |
| "learning_rate": 6.0997582456233354e-06, |
| "loss": 0.5697, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.043305076090287, |
| "grad_norm": 0.39990997314453125, |
| "learning_rate": 6.09526018921827e-06, |
| "loss": 0.6582, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.043334257408407136, |
| "grad_norm": 0.35249418020248413, |
| "learning_rate": 6.090761201549342e-06, |
| "loss": 0.5704, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.04336343872652728, |
| "grad_norm": 0.4183693826198578, |
| "learning_rate": 6.086261286441898e-06, |
| "loss": 1.6189, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.04339262004464742, |
| "grad_norm": 0.41847848892211914, |
| "learning_rate": 6.081760447722065e-06, |
| "loss": 1.0851, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.043421801362767555, |
| "grad_norm": 0.3046567142009735, |
| "learning_rate": 6.077258689216761e-06, |
| "loss": 0.5428, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.04345098268088769, |
| "grad_norm": 0.4517742693424225, |
| "learning_rate": 6.07275601475368e-06, |
| "loss": 1.3743, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.04348016399900784, |
| "grad_norm": 0.3410830795764923, |
| "learning_rate": 6.068252428161302e-06, |
| "loss": 0.5887, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.043509345317127975, |
| "grad_norm": 0.6816498041152954, |
| "learning_rate": 6.063747933268877e-06, |
| "loss": 0.5548, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.04353852663524811, |
| "grad_norm": 0.41065675020217896, |
| "learning_rate": 6.059242533906433e-06, |
| "loss": 0.6256, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.043567707953368257, |
| "grad_norm": 0.41196197271347046, |
| "learning_rate": 6.0547362339047585e-06, |
| "loss": 0.6855, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.043596889271488394, |
| "grad_norm": 0.3572843074798584, |
| "learning_rate": 6.050229037095418e-06, |
| "loss": 0.6138, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.04362607058960853, |
| "grad_norm": 0.3071393668651581, |
| "learning_rate": 6.045720947310728e-06, |
| "loss": 1.2278, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.04365525190772867, |
| "grad_norm": 0.36258363723754883, |
| "learning_rate": 6.041211968383773e-06, |
| "loss": 0.5287, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.043684433225848814, |
| "grad_norm": 0.39727750420570374, |
| "learning_rate": 6.036702104148391e-06, |
| "loss": 1.1, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.04371361454396895, |
| "grad_norm": 0.3095855414867401, |
| "learning_rate": 6.032191358439168e-06, |
| "loss": 0.8809, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.04374279586208909, |
| "grad_norm": 0.3498842120170593, |
| "learning_rate": 6.027679735091447e-06, |
| "loss": 0.4802, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.04377197718020923, |
| "grad_norm": 0.3247460722923279, |
| "learning_rate": 6.02316723794131e-06, |
| "loss": 0.4808, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.04380115849832937, |
| "grad_norm": 0.3547542095184326, |
| "learning_rate": 6.018653870825588e-06, |
| "loss": 0.9811, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.04383033981644951, |
| "grad_norm": 0.34135520458221436, |
| "learning_rate": 6.014139637581851e-06, |
| "loss": 0.6253, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.043859521134569646, |
| "grad_norm": 0.37919068336486816, |
| "learning_rate": 6.009624542048398e-06, |
| "loss": 1.1585, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.04388870245268979, |
| "grad_norm": 0.4812668263912201, |
| "learning_rate": 6.005108588064271e-06, |
| "loss": 0.7266, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.04391788377080993, |
| "grad_norm": 0.33309879899024963, |
| "learning_rate": 6.000591779469239e-06, |
| "loss": 0.4967, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.043947065088930065, |
| "grad_norm": 0.33536651730537415, |
| "learning_rate": 5.996074120103793e-06, |
| "loss": 0.3774, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.04397624640705021, |
| "grad_norm": 0.38309988379478455, |
| "learning_rate": 5.991555613809151e-06, |
| "loss": 0.4866, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.04400542772517035, |
| "grad_norm": 0.3609645366668701, |
| "learning_rate": 5.987036264427253e-06, |
| "loss": 0.6536, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.044034609043290485, |
| "grad_norm": 0.3769771456718445, |
| "learning_rate": 5.982516075800753e-06, |
| "loss": 0.5912, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.04406379036141062, |
| "grad_norm": 0.3419504165649414, |
| "learning_rate": 5.977995051773018e-06, |
| "loss": 1.0061, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.04409297167953077, |
| "grad_norm": 0.35084712505340576, |
| "learning_rate": 5.973473196188128e-06, |
| "loss": 0.6606, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.044122152997650904, |
| "grad_norm": 0.3714430630207062, |
| "learning_rate": 5.968950512890868e-06, |
| "loss": 1.1298, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.04415133431577104, |
| "grad_norm": 0.30666446685791016, |
| "learning_rate": 5.964427005726728e-06, |
| "loss": 0.4282, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.044180515633891186, |
| "grad_norm": 0.4037674367427826, |
| "learning_rate": 5.959902678541898e-06, |
| "loss": 1.1066, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.044209696952011324, |
| "grad_norm": 0.36675557494163513, |
| "learning_rate": 5.955377535183264e-06, |
| "loss": 0.4291, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.04423887827013146, |
| "grad_norm": 0.386046826839447, |
| "learning_rate": 5.950851579498409e-06, |
| "loss": 0.7465, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.0442680595882516, |
| "grad_norm": 0.6406238079071045, |
| "learning_rate": 5.946324815335602e-06, |
| "loss": 1.0103, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.04429724090637174, |
| "grad_norm": 0.49938124418258667, |
| "learning_rate": 5.941797246543807e-06, |
| "loss": 1.2251, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.04432642222449188, |
| "grad_norm": 0.3883410096168518, |
| "learning_rate": 5.937268876972662e-06, |
| "loss": 1.2568, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.04435560354261202, |
| "grad_norm": 0.4102421700954437, |
| "learning_rate": 5.932739710472496e-06, |
| "loss": 0.9265, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.04438478486073216, |
| "grad_norm": 0.4254116415977478, |
| "learning_rate": 5.9282097508943095e-06, |
| "loss": 0.6451, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.0444139661788523, |
| "grad_norm": 0.4661619961261749, |
| "learning_rate": 5.923679002089775e-06, |
| "loss": 0.5876, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.04444314749697244, |
| "grad_norm": 0.33193913102149963, |
| "learning_rate": 5.919147467911245e-06, |
| "loss": 0.6648, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.044472328815092575, |
| "grad_norm": 0.3980958163738251, |
| "learning_rate": 5.914615152211732e-06, |
| "loss": 0.6218, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.04450151013321272, |
| "grad_norm": 0.362708181142807, |
| "learning_rate": 5.910082058844916e-06, |
| "loss": 0.6168, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.04453069145133286, |
| "grad_norm": 0.3526471257209778, |
| "learning_rate": 5.905548191665137e-06, |
| "loss": 0.5615, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.044559872769452995, |
| "grad_norm": 0.3409159481525421, |
| "learning_rate": 5.901013554527396e-06, |
| "loss": 0.4506, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.04458905408757313, |
| "grad_norm": 0.30599796772003174, |
| "learning_rate": 5.8964781512873435e-06, |
| "loss": 1.06, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.04461823540569328, |
| "grad_norm": 0.4193313419818878, |
| "learning_rate": 5.891941985801287e-06, |
| "loss": 0.5946, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.044647416723813414, |
| "grad_norm": 0.32519033551216125, |
| "learning_rate": 5.887405061926178e-06, |
| "loss": 1.1463, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.04467659804193355, |
| "grad_norm": 0.3898588716983795, |
| "learning_rate": 5.882867383519614e-06, |
| "loss": 0.6789, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.044705779360053696, |
| "grad_norm": 0.35483554005622864, |
| "learning_rate": 5.8783289544398345e-06, |
| "loss": 0.8158, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.044734960678173834, |
| "grad_norm": 0.46464627981185913, |
| "learning_rate": 5.8737897785457175e-06, |
| "loss": 0.8837, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.04476414199629397, |
| "grad_norm": 0.49228841066360474, |
| "learning_rate": 5.8692498596967754e-06, |
| "loss": 0.8265, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.04479332331441411, |
| "grad_norm": 0.39155879616737366, |
| "learning_rate": 5.8647092017531535e-06, |
| "loss": 0.5222, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.04482250463253425, |
| "grad_norm": 0.3693219721317291, |
| "learning_rate": 5.860167808575622e-06, |
| "loss": 0.3615, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.04485168595065439, |
| "grad_norm": 0.39265987277030945, |
| "learning_rate": 5.855625684025581e-06, |
| "loss": 0.4919, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.04488086726877453, |
| "grad_norm": 0.4103962779045105, |
| "learning_rate": 5.851082831965049e-06, |
| "loss": 0.5275, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.04491004858689467, |
| "grad_norm": 0.35716426372528076, |
| "learning_rate": 5.8465392562566645e-06, |
| "loss": 0.4833, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.04493922990501481, |
| "grad_norm": 0.31867843866348267, |
| "learning_rate": 5.841994960763682e-06, |
| "loss": 0.4084, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.04496841122313495, |
| "grad_norm": 0.31158435344696045, |
| "learning_rate": 5.837449949349966e-06, |
| "loss": 0.3795, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.044997592541255085, |
| "grad_norm": 0.32950979471206665, |
| "learning_rate": 5.832904225879992e-06, |
| "loss": 1.1327, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.04502677385937523, |
| "grad_norm": 0.42442449927330017, |
| "learning_rate": 5.828357794218838e-06, |
| "loss": 1.0674, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.04505595517749537, |
| "grad_norm": 0.35462966561317444, |
| "learning_rate": 5.823810658232187e-06, |
| "loss": 1.0503, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.045085136495615505, |
| "grad_norm": 0.3628358840942383, |
| "learning_rate": 5.81926282178632e-06, |
| "loss": 1.0449, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.04511431781373565, |
| "grad_norm": 0.4045146703720093, |
| "learning_rate": 5.814714288748112e-06, |
| "loss": 2.2095, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.04514349913185579, |
| "grad_norm": 0.6266494989395142, |
| "learning_rate": 5.810165062985034e-06, |
| "loss": 0.6161, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.045172680449975924, |
| "grad_norm": 0.28228336572647095, |
| "learning_rate": 5.805615148365143e-06, |
| "loss": 1.0373, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.04520186176809606, |
| "grad_norm": 0.3559330403804779, |
| "learning_rate": 5.8010645487570806e-06, |
| "loss": 0.4801, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.045231043086216206, |
| "grad_norm": 0.3778706192970276, |
| "learning_rate": 5.796513268030072e-06, |
| "loss": 0.6412, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.045260224404336344, |
| "grad_norm": 0.3575391173362732, |
| "learning_rate": 5.791961310053927e-06, |
| "loss": 0.6181, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.04528940572245648, |
| "grad_norm": 0.3576406240463257, |
| "learning_rate": 5.78740867869902e-06, |
| "loss": 0.5147, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.045318587040576626, |
| "grad_norm": 0.31249988079071045, |
| "learning_rate": 5.782855377836308e-06, |
| "loss": 0.4694, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.04534776835869676, |
| "grad_norm": 0.36384817957878113, |
| "learning_rate": 5.778301411337315e-06, |
| "loss": 0.6714, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.0453769496768169, |
| "grad_norm": 0.5146538019180298, |
| "learning_rate": 5.773746783074127e-06, |
| "loss": 1.362, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.04540613099493704, |
| "grad_norm": 0.40540173649787903, |
| "learning_rate": 5.769191496919393e-06, |
| "loss": 1.024, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.04543531231305718, |
| "grad_norm": 0.3848044276237488, |
| "learning_rate": 5.76463555674633e-06, |
| "loss": 0.7109, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.04546449363117732, |
| "grad_norm": 0.3846138119697571, |
| "learning_rate": 5.7600789664287e-06, |
| "loss": 0.5909, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.04549367494929746, |
| "grad_norm": 0.42906057834625244, |
| "learning_rate": 5.7555217298408205e-06, |
| "loss": 0.8335, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.0455228562674176, |
| "grad_norm": 0.3721025586128235, |
| "learning_rate": 5.750963850857567e-06, |
| "loss": 0.627, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.04555203758553774, |
| "grad_norm": 0.32425162196159363, |
| "learning_rate": 5.74640533335435e-06, |
| "loss": 0.5512, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.04558121890365788, |
| "grad_norm": 0.3557809591293335, |
| "learning_rate": 5.741846181207128e-06, |
| "loss": 0.5865, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.045610400221778015, |
| "grad_norm": 0.6053258776664734, |
| "learning_rate": 5.7372863982924e-06, |
| "loss": 0.7147, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.04563958153989816, |
| "grad_norm": 0.37965813279151917, |
| "learning_rate": 5.7327259884872e-06, |
| "loss": 0.6593, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.0456687628580183, |
| "grad_norm": 0.4201296269893646, |
| "learning_rate": 5.728164955669095e-06, |
| "loss": 0.6004, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.045697944176138434, |
| "grad_norm": 0.42941543459892273, |
| "learning_rate": 5.7236033037161794e-06, |
| "loss": 0.8987, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.04572712549425858, |
| "grad_norm": 0.3951410949230194, |
| "learning_rate": 5.7190410365070805e-06, |
| "loss": 1.0398, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.045756306812378716, |
| "grad_norm": 0.45085379481315613, |
| "learning_rate": 5.714478157920942e-06, |
| "loss": 0.618, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.045785488130498854, |
| "grad_norm": 0.37628981471061707, |
| "learning_rate": 5.70991467183743e-06, |
| "loss": 0.4829, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.04581466944861899, |
| "grad_norm": 0.4668557941913605, |
| "learning_rate": 5.705350582136728e-06, |
| "loss": 0.6996, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.045843850766739136, |
| "grad_norm": 0.37691569328308105, |
| "learning_rate": 5.700785892699532e-06, |
| "loss": 0.5876, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.045873032084859273, |
| "grad_norm": 0.36309245228767395, |
| "learning_rate": 5.696220607407048e-06, |
| "loss": 1.1738, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.04590221340297941, |
| "grad_norm": 0.34443017840385437, |
| "learning_rate": 5.6916547301409894e-06, |
| "loss": 0.5748, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.045931394721099555, |
| "grad_norm": 0.31452393531799316, |
| "learning_rate": 5.687088264783569e-06, |
| "loss": 0.6054, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.04596057603921969, |
| "grad_norm": 0.40136411786079407, |
| "learning_rate": 5.682521215217504e-06, |
| "loss": 0.5726, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.04598975735733983, |
| "grad_norm": 0.3564138412475586, |
| "learning_rate": 5.677953585326011e-06, |
| "loss": 0.5445, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.04601893867545997, |
| "grad_norm": 0.3805476427078247, |
| "learning_rate": 5.67338537899279e-06, |
| "loss": 0.7026, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.04604811999358011, |
| "grad_norm": 0.4456822872161865, |
| "learning_rate": 5.66881660010204e-06, |
| "loss": 0.8621, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.04607730131170025, |
| "grad_norm": 0.41693344712257385, |
| "learning_rate": 5.664247252538448e-06, |
| "loss": 1.0088, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.04610648262982039, |
| "grad_norm": 0.3166910707950592, |
| "learning_rate": 5.659677340187173e-06, |
| "loss": 0.9629, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.046135663947940525, |
| "grad_norm": 0.3747338056564331, |
| "learning_rate": 5.655106866933865e-06, |
| "loss": 0.6892, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.04616484526606067, |
| "grad_norm": 0.43581539392471313, |
| "learning_rate": 5.650535836664649e-06, |
| "loss": 0.9378, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.04619402658418081, |
| "grad_norm": 0.3354921042919159, |
| "learning_rate": 5.6459642532661204e-06, |
| "loss": 1.076, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.046223207902300945, |
| "grad_norm": 0.3701512813568115, |
| "learning_rate": 5.6413921206253484e-06, |
| "loss": 0.7314, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.04625238922042109, |
| "grad_norm": 0.3477576673030853, |
| "learning_rate": 5.636819442629867e-06, |
| "loss": 1.1249, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.04628157053854123, |
| "grad_norm": 0.4196307957172394, |
| "learning_rate": 5.632246223167674e-06, |
| "loss": 1.3362, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.046310751856661364, |
| "grad_norm": 0.39420628547668457, |
| "learning_rate": 5.62767246612723e-06, |
| "loss": 0.7578, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.0463399331747815, |
| "grad_norm": 0.3676977753639221, |
| "learning_rate": 5.623098175397448e-06, |
| "loss": 1.4606, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.046369114492901646, |
| "grad_norm": 0.35821962356567383, |
| "learning_rate": 5.618523354867698e-06, |
| "loss": 0.5785, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.046398295811021784, |
| "grad_norm": 0.436478853225708, |
| "learning_rate": 5.613948008427803e-06, |
| "loss": 0.7407, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.04642747712914192, |
| "grad_norm": 0.3770596385002136, |
| "learning_rate": 5.609372139968028e-06, |
| "loss": 0.4835, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.046456658447262066, |
| "grad_norm": 0.42741721868515015, |
| "learning_rate": 5.604795753379084e-06, |
| "loss": 1.7176, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.0464858397653822, |
| "grad_norm": 0.42561039328575134, |
| "learning_rate": 5.600218852552121e-06, |
| "loss": 0.8387, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.04651502108350234, |
| "grad_norm": 0.34508511424064636, |
| "learning_rate": 5.595641441378731e-06, |
| "loss": 1.2232, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.04654420240162248, |
| "grad_norm": 0.4562961161136627, |
| "learning_rate": 5.591063523750936e-06, |
| "loss": 1.6838, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.04657338371974262, |
| "grad_norm": 0.36605069041252136, |
| "learning_rate": 5.586485103561187e-06, |
| "loss": 0.6669, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.04660256503786276, |
| "grad_norm": 0.40251708030700684, |
| "learning_rate": 5.581906184702367e-06, |
| "loss": 0.858, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.0466317463559829, |
| "grad_norm": 0.3805084228515625, |
| "learning_rate": 5.577326771067781e-06, |
| "loss": 0.7383, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.04666092767410304, |
| "grad_norm": 0.36825302243232727, |
| "learning_rate": 5.572746866551153e-06, |
| "loss": 1.4628, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.04669010899222318, |
| "grad_norm": 0.38981571793556213, |
| "learning_rate": 5.5681664750466266e-06, |
| "loss": 0.4684, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.04671929031034332, |
| "grad_norm": 0.3940315544605255, |
| "learning_rate": 5.56358560044876e-06, |
| "loss": 0.5657, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.046748471628463455, |
| "grad_norm": 0.3806256949901581, |
| "learning_rate": 5.559004246652516e-06, |
| "loss": 1.1082, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.0467776529465836, |
| "grad_norm": 0.49886786937713623, |
| "learning_rate": 5.554422417553276e-06, |
| "loss": 1.0887, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.04680683426470374, |
| "grad_norm": 0.46757110953330994, |
| "learning_rate": 5.549840117046814e-06, |
| "loss": 1.3562, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.046836015582823874, |
| "grad_norm": 0.40220311284065247, |
| "learning_rate": 5.545257349029313e-06, |
| "loss": 1.0773, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.04686519690094402, |
| "grad_norm": 0.35627269744873047, |
| "learning_rate": 5.54067411739735e-06, |
| "loss": 0.5211, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.046894378219064156, |
| "grad_norm": 0.4067480266094208, |
| "learning_rate": 5.5360904260478955e-06, |
| "loss": 0.5509, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.046923559537184294, |
| "grad_norm": 0.34013980627059937, |
| "learning_rate": 5.5315062788783135e-06, |
| "loss": 0.5864, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.04695274085530443, |
| "grad_norm": 0.3505479395389557, |
| "learning_rate": 5.526921679786353e-06, |
| "loss": 1.3651, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.046981922173424576, |
| "grad_norm": 0.35992175340652466, |
| "learning_rate": 5.52233663267015e-06, |
| "loss": 0.4685, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.04701110349154471, |
| "grad_norm": 0.3556915819644928, |
| "learning_rate": 5.517751141428218e-06, |
| "loss": 0.4695, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.04704028480966485, |
| "grad_norm": 0.3741947114467621, |
| "learning_rate": 5.513165209959452e-06, |
| "loss": 1.1398, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.047069466127784995, |
| "grad_norm": 0.34903985261917114, |
| "learning_rate": 5.508578842163117e-06, |
| "loss": 0.534, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.04709864744590513, |
| "grad_norm": 0.43442007899284363, |
| "learning_rate": 5.503992041938853e-06, |
| "loss": 0.8144, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.04712782876402527, |
| "grad_norm": 0.44650501012802124, |
| "learning_rate": 5.499404813186666e-06, |
| "loss": 0.978, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.04715701008214541, |
| "grad_norm": 0.3673059940338135, |
| "learning_rate": 5.4948171598069255e-06, |
| "loss": 0.5166, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.04718619140026555, |
| "grad_norm": 0.363301545381546, |
| "learning_rate": 5.490229085700362e-06, |
| "loss": 0.8028, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.04721537271838569, |
| "grad_norm": 0.386004239320755, |
| "learning_rate": 5.485640594768068e-06, |
| "loss": 1.0857, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.04724455403650583, |
| "grad_norm": 0.3484787046909332, |
| "learning_rate": 5.481051690911484e-06, |
| "loss": 1.0368, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.04727373535462597, |
| "grad_norm": 0.4113389253616333, |
| "learning_rate": 5.4764623780324055e-06, |
| "loss": 0.9225, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.04730291667274611, |
| "grad_norm": 0.4460744261741638, |
| "learning_rate": 5.471872660032974e-06, |
| "loss": 1.7242, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.04733209799086625, |
| "grad_norm": 0.32895615696907043, |
| "learning_rate": 5.467282540815678e-06, |
| "loss": 0.4936, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.047361279308986384, |
| "grad_norm": 0.4129882752895355, |
| "learning_rate": 5.462692024283346e-06, |
| "loss": 1.5617, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.04739046062710653, |
| "grad_norm": 0.413620263338089, |
| "learning_rate": 5.458101114339141e-06, |
| "loss": 1.1157, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.047419641945226666, |
| "grad_norm": 0.34934747219085693, |
| "learning_rate": 5.453509814886566e-06, |
| "loss": 0.598, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.047448823263346804, |
| "grad_norm": 0.44166824221611023, |
| "learning_rate": 5.44891812982945e-06, |
| "loss": 0.9538, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.04747800458146695, |
| "grad_norm": 0.35320553183555603, |
| "learning_rate": 5.4443260630719545e-06, |
| "loss": 0.4575, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.047507185899587086, |
| "grad_norm": 0.3392915427684784, |
| "learning_rate": 5.439733618518563e-06, |
| "loss": 1.1235, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.04753636721770722, |
| "grad_norm": 0.30021265149116516, |
| "learning_rate": 5.435140800074081e-06, |
| "loss": 0.6758, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.04756554853582736, |
| "grad_norm": 0.6875030994415283, |
| "learning_rate": 5.43054761164363e-06, |
| "loss": 0.5988, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.047594729853947505, |
| "grad_norm": 0.34318453073501587, |
| "learning_rate": 5.4259540571326495e-06, |
| "loss": 0.6302, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.04762391117206764, |
| "grad_norm": 0.5387415289878845, |
| "learning_rate": 5.421360140446887e-06, |
| "loss": 1.1102, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.04765309249018778, |
| "grad_norm": 0.3711667060852051, |
| "learning_rate": 5.4167658654923994e-06, |
| "loss": 0.716, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.04768227380830792, |
| "grad_norm": 0.3770090639591217, |
| "learning_rate": 5.41217123617555e-06, |
| "loss": 0.6143, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.04771145512642806, |
| "grad_norm": 0.3346192538738251, |
| "learning_rate": 5.407576256403e-06, |
| "loss": 0.5549, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.0477406364445482, |
| "grad_norm": 0.2995697855949402, |
| "learning_rate": 5.4029809300817106e-06, |
| "loss": 0.4701, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.04776981776266834, |
| "grad_norm": 0.5886590480804443, |
| "learning_rate": 5.398385261118937e-06, |
| "loss": 0.5014, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.04779899908078848, |
| "grad_norm": 0.35009685158729553, |
| "learning_rate": 5.393789253422225e-06, |
| "loss": 1.0042, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.04782818039890862, |
| "grad_norm": 0.39260685443878174, |
| "learning_rate": 5.3891929108994125e-06, |
| "loss": 0.5922, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.04785736171702876, |
| "grad_norm": 0.39857110381126404, |
| "learning_rate": 5.3845962374586165e-06, |
| "loss": 0.5686, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.047886543035148894, |
| "grad_norm": 0.31844672560691833, |
| "learning_rate": 5.379999237008238e-06, |
| "loss": 0.5281, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.04791572435326904, |
| "grad_norm": 0.37254342436790466, |
| "learning_rate": 5.375401913456957e-06, |
| "loss": 0.5094, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.047944905671389176, |
| "grad_norm": 0.323326975107193, |
| "learning_rate": 5.370804270713725e-06, |
| "loss": 0.4384, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.047974086989509314, |
| "grad_norm": 0.3509173095226288, |
| "learning_rate": 5.366206312687769e-06, |
| "loss": 0.4761, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.04800326830762946, |
| "grad_norm": 0.3705430328845978, |
| "learning_rate": 5.361608043288582e-06, |
| "loss": 1.7497, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.048032449625749596, |
| "grad_norm": 0.36233147978782654, |
| "learning_rate": 5.357009466425921e-06, |
| "loss": 0.5567, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.04806163094386973, |
| "grad_norm": 0.4284593462944031, |
| "learning_rate": 5.352410586009806e-06, |
| "loss": 0.6568, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.04809081226198987, |
| "grad_norm": 0.4066416919231415, |
| "learning_rate": 5.347811405950513e-06, |
| "loss": 0.657, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.048119993580110015, |
| "grad_norm": 0.47491782903671265, |
| "learning_rate": 5.343211930158576e-06, |
| "loss": 1.0728, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.04814917489823015, |
| "grad_norm": 0.3975834846496582, |
| "learning_rate": 5.3386121625447784e-06, |
| "loss": 0.6484, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.04817835621635029, |
| "grad_norm": 0.39328670501708984, |
| "learning_rate": 5.33401210702015e-06, |
| "loss": 0.655, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.048207537534470435, |
| "grad_norm": 0.3949771225452423, |
| "learning_rate": 5.329411767495968e-06, |
| "loss": 0.6678, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.04823671885259057, |
| "grad_norm": 0.3278443515300751, |
| "learning_rate": 5.324811147883753e-06, |
| "loss": 1.0587, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.04826590017071071, |
| "grad_norm": 0.4281160533428192, |
| "learning_rate": 5.320210252095257e-06, |
| "loss": 1.1364, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.04829508148883085, |
| "grad_norm": 0.39665237069129944, |
| "learning_rate": 5.315609084042474e-06, |
| "loss": 1.0365, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.04832426280695099, |
| "grad_norm": 0.3179062306880951, |
| "learning_rate": 5.311007647637626e-06, |
| "loss": 0.4506, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.04835344412507113, |
| "grad_norm": 0.3577589690685272, |
| "learning_rate": 5.306405946793162e-06, |
| "loss": 1.6702, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.04838262544319127, |
| "grad_norm": 0.4181123375892639, |
| "learning_rate": 5.30180398542176e-06, |
| "loss": 0.5226, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.04841180676131141, |
| "grad_norm": 0.4699373245239258, |
| "learning_rate": 5.297201767436315e-06, |
| "loss": 1.7044, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.04844098807943155, |
| "grad_norm": 0.36552685499191284, |
| "learning_rate": 5.2925992967499426e-06, |
| "loss": 0.5992, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.048470169397551686, |
| "grad_norm": 0.38431382179260254, |
| "learning_rate": 5.287996577275973e-06, |
| "loss": 1.0892, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.048499350715671824, |
| "grad_norm": 0.3768801987171173, |
| "learning_rate": 5.283393612927949e-06, |
| "loss": 0.5428, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.04852853203379197, |
| "grad_norm": 0.4842134416103363, |
| "learning_rate": 5.278790407619618e-06, |
| "loss": 1.159, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.048557713351912106, |
| "grad_norm": 0.35083457827568054, |
| "learning_rate": 5.274186965264937e-06, |
| "loss": 0.7239, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.048586894670032243, |
| "grad_norm": 0.4802244305610657, |
| "learning_rate": 5.269583289778061e-06, |
| "loss": 1.0813, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.04861607598815239, |
| "grad_norm": 0.2682845890522003, |
| "learning_rate": 5.264979385073345e-06, |
| "loss": 0.6652, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.048645257306272525, |
| "grad_norm": 0.4198437035083771, |
| "learning_rate": 5.260375255065338e-06, |
| "loss": 1.227, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.04867443862439266, |
| "grad_norm": 0.389621764421463, |
| "learning_rate": 5.25577090366878e-06, |
| "loss": 0.532, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.0487036199425128, |
| "grad_norm": 0.34313276410102844, |
| "learning_rate": 5.251166334798603e-06, |
| "loss": 1.0799, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.048732801260632945, |
| "grad_norm": 0.39682772755622864, |
| "learning_rate": 5.246561552369916e-06, |
| "loss": 0.6497, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.04876198257875308, |
| "grad_norm": 0.39475715160369873, |
| "learning_rate": 5.241956560298021e-06, |
| "loss": 0.4502, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.04879116389687322, |
| "grad_norm": 0.46035075187683105, |
| "learning_rate": 5.237351362498389e-06, |
| "loss": 1.0513, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.048820345214993364, |
| "grad_norm": 0.40602508187294006, |
| "learning_rate": 5.232745962886666e-06, |
| "loss": 0.5903, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.0488495265331135, |
| "grad_norm": 0.33140790462493896, |
| "learning_rate": 5.228140365378677e-06, |
| "loss": 0.5869, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.04887870785123364, |
| "grad_norm": 0.346050888299942, |
| "learning_rate": 5.223534573890409e-06, |
| "loss": 0.4712, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.04890788916935378, |
| "grad_norm": 0.40813395380973816, |
| "learning_rate": 5.218928592338013e-06, |
| "loss": 0.6421, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.04893707048747392, |
| "grad_norm": 0.40515461564064026, |
| "learning_rate": 5.214322424637808e-06, |
| "loss": 0.51, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.04896625180559406, |
| "grad_norm": 0.4099877178668976, |
| "learning_rate": 5.209716074706267e-06, |
| "loss": 0.8879, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.0489954331237142, |
| "grad_norm": 0.32295849919319153, |
| "learning_rate": 5.205109546460015e-06, |
| "loss": 0.5029, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.04902461444183434, |
| "grad_norm": 0.5721728801727295, |
| "learning_rate": 5.200502843815837e-06, |
| "loss": 1.1584, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.04905379575995448, |
| "grad_norm": 0.44620224833488464, |
| "learning_rate": 5.195895970690659e-06, |
| "loss": 1.4668, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.049082977078074616, |
| "grad_norm": 0.39212411642074585, |
| "learning_rate": 5.191288931001554e-06, |
| "loss": 1.6099, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.049112158396194754, |
| "grad_norm": 0.36707285046577454, |
| "learning_rate": 5.186681728665737e-06, |
| "loss": 0.4921, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.0491413397143149, |
| "grad_norm": 0.3235447108745575, |
| "learning_rate": 5.1820743676005605e-06, |
| "loss": 0.5744, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.049170521032435036, |
| "grad_norm": 0.44411349296569824, |
| "learning_rate": 5.177466851723514e-06, |
| "loss": 0.8835, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.04919970235055517, |
| "grad_norm": 0.38251692056655884, |
| "learning_rate": 5.172859184952217e-06, |
| "loss": 0.7027, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.04922888366867531, |
| "grad_norm": 0.5228366255760193, |
| "learning_rate": 5.168251371204418e-06, |
| "loss": 0.546, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.049258064986795455, |
| "grad_norm": 0.3755437135696411, |
| "learning_rate": 5.16364341439799e-06, |
| "loss": 1.6731, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.04928724630491559, |
| "grad_norm": 0.4701140820980072, |
| "learning_rate": 5.159035318450927e-06, |
| "loss": 0.6631, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.04931642762303573, |
| "grad_norm": 0.5090385675430298, |
| "learning_rate": 5.154427087281342e-06, |
| "loss": 0.5883, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.049345608941155875, |
| "grad_norm": 0.40497612953186035, |
| "learning_rate": 5.149818724807463e-06, |
| "loss": 0.5393, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.04937479025927601, |
| "grad_norm": 0.38610902428627014, |
| "learning_rate": 5.145210234947631e-06, |
| "loss": 0.7361, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.04940397157739615, |
| "grad_norm": 0.41340652108192444, |
| "learning_rate": 5.140601621620293e-06, |
| "loss": 0.7347, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.04943315289551629, |
| "grad_norm": 0.3586626946926117, |
| "learning_rate": 5.135992888744002e-06, |
| "loss": 0.5494, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.04946233421363643, |
| "grad_norm": 0.33416488766670227, |
| "learning_rate": 5.13138404023741e-06, |
| "loss": 0.8376, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.04949151553175657, |
| "grad_norm": 0.3049091696739197, |
| "learning_rate": 5.126775080019275e-06, |
| "loss": 0.4129, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.04952069684987671, |
| "grad_norm": 0.3171428442001343, |
| "learning_rate": 5.122166012008444e-06, |
| "loss": 0.4096, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.04954987816799685, |
| "grad_norm": 0.4023645520210266, |
| "learning_rate": 5.117556840123851e-06, |
| "loss": 0.678, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.04957905948611699, |
| "grad_norm": 0.3831484019756317, |
| "learning_rate": 5.112947568284531e-06, |
| "loss": 0.5166, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.049608240804237126, |
| "grad_norm": 0.3679618835449219, |
| "learning_rate": 5.108338200409592e-06, |
| "loss": 0.6442, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.049637422122357264, |
| "grad_norm": 0.3741089701652527, |
| "learning_rate": 5.103728740418229e-06, |
| "loss": 0.5331, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.04966660344047741, |
| "grad_norm": 0.5156499147415161, |
| "learning_rate": 5.099119192229719e-06, |
| "loss": 1.361, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.049695784758597546, |
| "grad_norm": 0.3307226598262787, |
| "learning_rate": 5.094509559763404e-06, |
| "loss": 1.1373, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.04972496607671768, |
| "grad_norm": 0.642590343952179, |
| "learning_rate": 5.089899846938707e-06, |
| "loss": 0.9344, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.04975414739483783, |
| "grad_norm": 0.4349033832550049, |
| "learning_rate": 5.085290057675117e-06, |
| "loss": 1.2254, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.049783328712957965, |
| "grad_norm": 0.38902968168258667, |
| "learning_rate": 5.080680195892183e-06, |
| "loss": 1.4141, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.0498125100310781, |
| "grad_norm": 0.4432470500469208, |
| "learning_rate": 5.07607026550952e-06, |
| "loss": 0.5056, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.04984169134919824, |
| "grad_norm": 0.36213016510009766, |
| "learning_rate": 5.071460270446805e-06, |
| "loss": 0.5099, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.049870872667318385, |
| "grad_norm": 0.3571278750896454, |
| "learning_rate": 5.066850214623762e-06, |
| "loss": 0.564, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.04990005398543852, |
| "grad_norm": 0.4040015935897827, |
| "learning_rate": 5.06224010196017e-06, |
| "loss": 0.6354, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.04992923530355866, |
| "grad_norm": 0.3847454786300659, |
| "learning_rate": 5.0576299363758605e-06, |
| "loss": 1.1227, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.049958416621678804, |
| "grad_norm": 0.6794761419296265, |
| "learning_rate": 5.053019721790703e-06, |
| "loss": 0.686, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.04998759793979894, |
| "grad_norm": 0.44063642621040344, |
| "learning_rate": 5.0484094621246126e-06, |
| "loss": 1.2616, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.05001677925791908, |
| "grad_norm": 0.5193138718605042, |
| "learning_rate": 5.043799161297542e-06, |
| "loss": 0.5531, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.05004596057603922, |
| "grad_norm": 0.33178043365478516, |
| "learning_rate": 5.03918882322948e-06, |
| "loss": 0.519, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.05007514189415936, |
| "grad_norm": 0.36429113149642944, |
| "learning_rate": 5.034578451840445e-06, |
| "loss": 0.6239, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.0501043232122795, |
| "grad_norm": 0.3573700785636902, |
| "learning_rate": 5.029968051050485e-06, |
| "loss": 0.6005, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.050133504530399636, |
| "grad_norm": 0.397399365901947, |
| "learning_rate": 5.025357624779673e-06, |
| "loss": 1.0494, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.05016268584851978, |
| "grad_norm": 0.3663575053215027, |
| "learning_rate": 5.020747176948103e-06, |
| "loss": 0.7401, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.05019186716663992, |
| "grad_norm": 0.44454318284988403, |
| "learning_rate": 5.0161367114758885e-06, |
| "loss": 0.8683, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.050221048484760056, |
| "grad_norm": 0.379833847284317, |
| "learning_rate": 5.0115262322831585e-06, |
| "loss": 0.5644, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.05025022980288019, |
| "grad_norm": 0.9493775367736816, |
| "learning_rate": 5.006915743290048e-06, |
| "loss": 0.8746, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.05027941112100034, |
| "grad_norm": 0.360813707113266, |
| "learning_rate": 5.002305248416709e-06, |
| "loss": 0.3802, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.050308592439120475, |
| "grad_norm": 0.359452486038208, |
| "learning_rate": 4.997694751583293e-06, |
| "loss": 0.5995, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.05033777375724061, |
| "grad_norm": 0.34378495812416077, |
| "learning_rate": 4.9930842567099535e-06, |
| "loss": 0.5416, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.05036695507536076, |
| "grad_norm": 0.4232017993927002, |
| "learning_rate": 4.988473767716843e-06, |
| "loss": 0.9639, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.050396136393480895, |
| "grad_norm": 0.37494608759880066, |
| "learning_rate": 4.983863288524112e-06, |
| "loss": 1.0635, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.05042531771160103, |
| "grad_norm": 0.43267616629600525, |
| "learning_rate": 4.979252823051898e-06, |
| "loss": 0.8451, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.05045449902972117, |
| "grad_norm": 0.4136841297149658, |
| "learning_rate": 4.9746423752203275e-06, |
| "loss": 1.1121, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.050483680347841314, |
| "grad_norm": 0.3568452298641205, |
| "learning_rate": 4.9700319489495164e-06, |
| "loss": 0.6266, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.05051286166596145, |
| "grad_norm": 0.45628127455711365, |
| "learning_rate": 4.965421548159556e-06, |
| "loss": 0.9397, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.05054204298408159, |
| "grad_norm": 0.28650036454200745, |
| "learning_rate": 4.96081117677052e-06, |
| "loss": 0.2575, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.050571224302201734, |
| "grad_norm": 0.37273702025413513, |
| "learning_rate": 4.956200838702459e-06, |
| "loss": 1.7468, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.05060040562032187, |
| "grad_norm": 0.9242348074913025, |
| "learning_rate": 4.951590537875389e-06, |
| "loss": 1.668, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.05062958693844201, |
| "grad_norm": 0.381517618894577, |
| "learning_rate": 4.946980278209298e-06, |
| "loss": 0.4889, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.050658768256562146, |
| "grad_norm": 0.35031402111053467, |
| "learning_rate": 4.94237006362414e-06, |
| "loss": 0.4679, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.05068794957468229, |
| "grad_norm": 0.3489398658275604, |
| "learning_rate": 4.937759898039831e-06, |
| "loss": 0.5752, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.05071713089280243, |
| "grad_norm": 0.3177669942378998, |
| "learning_rate": 4.933149785376238e-06, |
| "loss": 0.4948, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.050746312210922566, |
| "grad_norm": 0.49790051579475403, |
| "learning_rate": 4.928539729553196e-06, |
| "loss": 1.4488, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.0507754935290427, |
| "grad_norm": 0.43988800048828125, |
| "learning_rate": 4.923929734490481e-06, |
| "loss": 0.9619, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.05080467484716285, |
| "grad_norm": 0.41190025210380554, |
| "learning_rate": 4.919319804107819e-06, |
| "loss": 1.1826, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.050833856165282985, |
| "grad_norm": 0.39425376057624817, |
| "learning_rate": 4.914709942324885e-06, |
| "loss": 1.5996, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.05086303748340312, |
| "grad_norm": 0.3550109267234802, |
| "learning_rate": 4.9101001530612945e-06, |
| "loss": 1.2402, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.05089221880152327, |
| "grad_norm": 0.3233683109283447, |
| "learning_rate": 4.9054904402365966e-06, |
| "loss": 1.3174, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.050921400119643405, |
| "grad_norm": 0.5113329291343689, |
| "learning_rate": 4.900880807770283e-06, |
| "loss": 1.1184, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.05095058143776354, |
| "grad_norm": 0.3777258098125458, |
| "learning_rate": 4.896271259581773e-06, |
| "loss": 0.5711, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.05097976275588368, |
| "grad_norm": 0.4085157513618469, |
| "learning_rate": 4.89166179959041e-06, |
| "loss": 1.0628, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.051008944074003824, |
| "grad_norm": 0.3713407516479492, |
| "learning_rate": 4.88705243171547e-06, |
| "loss": 1.0235, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.05103812539212396, |
| "grad_norm": 0.41610291600227356, |
| "learning_rate": 4.88244315987615e-06, |
| "loss": 0.8116, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.0510673067102441, |
| "grad_norm": 0.4267372190952301, |
| "learning_rate": 4.8778339879915595e-06, |
| "loss": 1.2599, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.051096488028364244, |
| "grad_norm": 0.3548785448074341, |
| "learning_rate": 4.873224919980725e-06, |
| "loss": 0.5179, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.05112566934648438, |
| "grad_norm": 0.36194413900375366, |
| "learning_rate": 4.868615959762591e-06, |
| "loss": 0.5201, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.05115485066460452, |
| "grad_norm": 0.48503541946411133, |
| "learning_rate": 4.864007111256001e-06, |
| "loss": 1.1877, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.051184031982724656, |
| "grad_norm": 0.45998358726501465, |
| "learning_rate": 4.859398378379708e-06, |
| "loss": 0.7679, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.0512132133008448, |
| "grad_norm": 0.36723312735557556, |
| "learning_rate": 4.854789765052371e-06, |
| "loss": 0.5282, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.05124239461896494, |
| "grad_norm": 0.34161657094955444, |
| "learning_rate": 4.850181275192539e-06, |
| "loss": 1.1337, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.051271575937085076, |
| "grad_norm": 0.48422956466674805, |
| "learning_rate": 4.845572912718659e-06, |
| "loss": 0.7341, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.05130075725520522, |
| "grad_norm": 0.36561906337738037, |
| "learning_rate": 4.8409646815490756e-06, |
| "loss": 0.582, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.05132993857332536, |
| "grad_norm": 0.3676013648509979, |
| "learning_rate": 4.836356585602012e-06, |
| "loss": 1.0077, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.051359119891445495, |
| "grad_norm": 0.38218387961387634, |
| "learning_rate": 4.831748628795582e-06, |
| "loss": 0.5932, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.05138830120956563, |
| "grad_norm": 0.4253503084182739, |
| "learning_rate": 4.827140815047784e-06, |
| "loss": 0.8832, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.05141748252768578, |
| "grad_norm": 0.3717549443244934, |
| "learning_rate": 4.822533148276487e-06, |
| "loss": 1.0685, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.051446663845805915, |
| "grad_norm": 0.40669649839401245, |
| "learning_rate": 4.81792563239944e-06, |
| "loss": 0.6747, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.05147584516392605, |
| "grad_norm": 0.6465722322463989, |
| "learning_rate": 4.8133182713342655e-06, |
| "loss": 1.0716, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.0515050264820462, |
| "grad_norm": 0.3936142921447754, |
| "learning_rate": 4.808711068998448e-06, |
| "loss": 1.7433, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.051534207800166335, |
| "grad_norm": 0.37912002205848694, |
| "learning_rate": 4.804104029309344e-06, |
| "loss": 0.5185, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.05156338911828647, |
| "grad_norm": 0.392672598361969, |
| "learning_rate": 4.799497156184163e-06, |
| "loss": 0.708, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.05159257043640661, |
| "grad_norm": 0.3458792269229889, |
| "learning_rate": 4.794890453539986e-06, |
| "loss": 0.5662, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.051621751754526754, |
| "grad_norm": 0.4178561866283417, |
| "learning_rate": 4.7902839252937355e-06, |
| "loss": 0.4593, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.05165093307264689, |
| "grad_norm": 0.36535945534706116, |
| "learning_rate": 4.785677575362192e-06, |
| "loss": 0.575, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.05168011439076703, |
| "grad_norm": 0.4075562059879303, |
| "learning_rate": 4.781071407661989e-06, |
| "loss": 0.52, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.051709295708887174, |
| "grad_norm": 0.37340477108955383, |
| "learning_rate": 4.776465426109595e-06, |
| "loss": 0.5554, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.05173847702700731, |
| "grad_norm": 0.370836079120636, |
| "learning_rate": 4.771859634621324e-06, |
| "loss": 0.6705, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.05176765834512745, |
| "grad_norm": 0.3422700762748718, |
| "learning_rate": 4.767254037113336e-06, |
| "loss": 0.9884, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.051796839663247586, |
| "grad_norm": 0.35692858695983887, |
| "learning_rate": 4.762648637501614e-06, |
| "loss": 1.0467, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.05182602098136773, |
| "grad_norm": 0.38037583231925964, |
| "learning_rate": 4.758043439701979e-06, |
| "loss": 0.639, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.05185520229948787, |
| "grad_norm": 0.3402298092842102, |
| "learning_rate": 4.753438447630084e-06, |
| "loss": 0.6178, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.051884383617608006, |
| "grad_norm": 0.5744924545288086, |
| "learning_rate": 4.7488336652014e-06, |
| "loss": 0.4455, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.05191356493572815, |
| "grad_norm": 0.4713403284549713, |
| "learning_rate": 4.74422909633122e-06, |
| "loss": 1.1474, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.05194274625384829, |
| "grad_norm": 0.42325547337532043, |
| "learning_rate": 4.739624744934664e-06, |
| "loss": 1.0678, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.051971927571968425, |
| "grad_norm": 0.37513813376426697, |
| "learning_rate": 4.735020614926657e-06, |
| "loss": 1.3245, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.05200110889008856, |
| "grad_norm": 0.45673245191574097, |
| "learning_rate": 4.730416710221939e-06, |
| "loss": 0.5902, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.05203029020820871, |
| "grad_norm": 0.37685784697532654, |
| "learning_rate": 4.725813034735064e-06, |
| "loss": 0.6112, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.052059471526328845, |
| "grad_norm": 0.40913015604019165, |
| "learning_rate": 4.721209592380383e-06, |
| "loss": 0.483, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.05208865284444898, |
| "grad_norm": 0.35341158509254456, |
| "learning_rate": 4.7166063870720514e-06, |
| "loss": 0.6337, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.05211783416256913, |
| "grad_norm": 0.3982934057712555, |
| "learning_rate": 4.712003422724028e-06, |
| "loss": 0.8045, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.052147015480689264, |
| "grad_norm": 0.35283780097961426, |
| "learning_rate": 4.707400703250058e-06, |
| "loss": 1.0922, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.0521761967988094, |
| "grad_norm": 0.5964015126228333, |
| "learning_rate": 4.702798232563688e-06, |
| "loss": 0.5912, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.05220537811692954, |
| "grad_norm": 0.5212831497192383, |
| "learning_rate": 4.698196014578241e-06, |
| "loss": 0.9345, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.052234559435049684, |
| "grad_norm": 0.4061563313007355, |
| "learning_rate": 4.693594053206839e-06, |
| "loss": 1.1542, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.05226374075316982, |
| "grad_norm": 0.33173060417175293, |
| "learning_rate": 4.6889923523623765e-06, |
| "loss": 0.8775, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.05229292207128996, |
| "grad_norm": 0.37740442156791687, |
| "learning_rate": 4.6843909159575265e-06, |
| "loss": 0.4707, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.052322103389410096, |
| "grad_norm": 0.3504139482975006, |
| "learning_rate": 4.679789747904744e-06, |
| "loss": 1.0944, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.05235128470753024, |
| "grad_norm": 0.38675037026405334, |
| "learning_rate": 4.67518885211625e-06, |
| "loss": 0.795, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.05238046602565038, |
| "grad_norm": 0.43276306986808777, |
| "learning_rate": 4.670588232504032e-06, |
| "loss": 0.8938, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.052409647343770516, |
| "grad_norm": 0.43321946263313293, |
| "learning_rate": 4.6659878929798515e-06, |
| "loss": 0.7193, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.05243882866189066, |
| "grad_norm": 0.38024991750717163, |
| "learning_rate": 4.661387837455224e-06, |
| "loss": 1.0104, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.0524680099800108, |
| "grad_norm": 0.3712158799171448, |
| "learning_rate": 4.656788069841425e-06, |
| "loss": 0.5663, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.052497191298130935, |
| "grad_norm": 0.3386072516441345, |
| "learning_rate": 4.652188594049488e-06, |
| "loss": 0.5308, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.05252637261625107, |
| "grad_norm": 0.4583183526992798, |
| "learning_rate": 4.647589413990196e-06, |
| "loss": 0.502, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.05255555393437122, |
| "grad_norm": 0.34447282552719116, |
| "learning_rate": 4.64299053357408e-06, |
| "loss": 0.5678, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.052584735252491355, |
| "grad_norm": 0.3625425100326538, |
| "learning_rate": 4.638391956711419e-06, |
| "loss": 0.475, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.05261391657061149, |
| "grad_norm": 0.3461594581604004, |
| "learning_rate": 4.633793687312232e-06, |
| "loss": 0.5148, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.05264309788873164, |
| "grad_norm": 0.33626240491867065, |
| "learning_rate": 4.629195729286276e-06, |
| "loss": 0.6272, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.052672279206851774, |
| "grad_norm": 0.43227168917655945, |
| "learning_rate": 4.624598086543045e-06, |
| "loss": 1.016, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.05270146052497191, |
| "grad_norm": 0.5987937450408936, |
| "learning_rate": 4.620000762991763e-06, |
| "loss": 0.5211, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.05273064184309205, |
| "grad_norm": 0.37128105759620667, |
| "learning_rate": 4.615403762541384e-06, |
| "loss": 0.6478, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.052759823161212194, |
| "grad_norm": 0.3310234844684601, |
| "learning_rate": 4.610807089100588e-06, |
| "loss": 0.5364, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.05278900447933233, |
| "grad_norm": 0.36348482966423035, |
| "learning_rate": 4.6062107465777754e-06, |
| "loss": 1.1472, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.05281818579745247, |
| "grad_norm": 0.40133196115493774, |
| "learning_rate": 4.601614738881066e-06, |
| "loss": 0.6089, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.05284736711557261, |
| "grad_norm": 0.3525753617286682, |
| "learning_rate": 4.597019069918291e-06, |
| "loss": 0.5829, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.05287654843369275, |
| "grad_norm": 0.3545469641685486, |
| "learning_rate": 4.592423743597001e-06, |
| "loss": 1.0589, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.05290572975181289, |
| "grad_norm": 0.36246365308761597, |
| "learning_rate": 4.5878287638244525e-06, |
| "loss": 1.0381, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.052934911069933026, |
| "grad_norm": 0.374603271484375, |
| "learning_rate": 4.583234134507601e-06, |
| "loss": 0.5822, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.05296409238805317, |
| "grad_norm": 0.34840264916419983, |
| "learning_rate": 4.5786398595531146e-06, |
| "loss": 0.4758, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.05299327370617331, |
| "grad_norm": 0.42411431670188904, |
| "learning_rate": 4.574045942867353e-06, |
| "loss": 0.8088, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.053022455024293445, |
| "grad_norm": 0.34652039408683777, |
| "learning_rate": 4.569452388356371e-06, |
| "loss": 0.5226, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.05305163634241359, |
| "grad_norm": 0.32107797265052795, |
| "learning_rate": 4.5648591999259205e-06, |
| "loss": 0.4983, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.05308081766053373, |
| "grad_norm": 0.3383368253707886, |
| "learning_rate": 4.560266381481439e-06, |
| "loss": 0.5637, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.053109998978653865, |
| "grad_norm": 0.45076823234558105, |
| "learning_rate": 4.555673936928046e-06, |
| "loss": 1.1799, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.053139180296774, |
| "grad_norm": 0.3601725995540619, |
| "learning_rate": 4.551081870170551e-06, |
| "loss": 0.5536, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.05316836161489415, |
| "grad_norm": 0.3670978844165802, |
| "learning_rate": 4.546490185113437e-06, |
| "loss": 1.0627, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.053197542933014284, |
| "grad_norm": 0.36346182227134705, |
| "learning_rate": 4.54189888566086e-06, |
| "loss": 0.5895, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.05322672425113442, |
| "grad_norm": 0.3195435702800751, |
| "learning_rate": 4.537307975716655e-06, |
| "loss": 0.4965, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.053255905569254566, |
| "grad_norm": 0.34550654888153076, |
| "learning_rate": 4.532717459184323e-06, |
| "loss": 1.0638, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.053285086887374704, |
| "grad_norm": 0.40399444103240967, |
| "learning_rate": 4.528127339967025e-06, |
| "loss": 1.0648, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.05331426820549484, |
| "grad_norm": 0.4607871174812317, |
| "learning_rate": 4.523537621967596e-06, |
| "loss": 0.8974, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.05334344952361498, |
| "grad_norm": 0.3411993980407715, |
| "learning_rate": 4.518948309088517e-06, |
| "loss": 0.6196, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.05337263084173512, |
| "grad_norm": 0.5008710026741028, |
| "learning_rate": 4.514359405231932e-06, |
| "loss": 0.9201, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.05340181215985526, |
| "grad_norm": 0.3182060420513153, |
| "learning_rate": 4.5097709142996385e-06, |
| "loss": 0.3763, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.0534309934779754, |
| "grad_norm": 0.3759273886680603, |
| "learning_rate": 4.505182840193076e-06, |
| "loss": 0.6374, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.05346017479609554, |
| "grad_norm": 0.40615200996398926, |
| "learning_rate": 4.500595186813336e-06, |
| "loss": 1.0235, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.05348935611421568, |
| "grad_norm": 0.43341973423957825, |
| "learning_rate": 4.496007958061147e-06, |
| "loss": 0.8937, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.05351853743233582, |
| "grad_norm": 0.3654787242412567, |
| "learning_rate": 4.491421157836884e-06, |
| "loss": 0.7309, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.053547718750455955, |
| "grad_norm": 0.39679285883903503, |
| "learning_rate": 4.486834790040551e-06, |
| "loss": 0.7204, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.0535769000685761, |
| "grad_norm": 0.37103912234306335, |
| "learning_rate": 4.482248858571783e-06, |
| "loss": 0.5724, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.05360608138669624, |
| "grad_norm": 0.43709734082221985, |
| "learning_rate": 4.477663367329852e-06, |
| "loss": 1.0683, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.053635262704816375, |
| "grad_norm": 0.41481149196624756, |
| "learning_rate": 4.473078320213649e-06, |
| "loss": 0.9718, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.05366444402293652, |
| "grad_norm": 0.4022238850593567, |
| "learning_rate": 4.468493721121687e-06, |
| "loss": 0.8341, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.05369362534105666, |
| "grad_norm": 0.40236490964889526, |
| "learning_rate": 4.463909573952105e-06, |
| "loss": 0.5343, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.053722806659176794, |
| "grad_norm": 0.34984469413757324, |
| "learning_rate": 4.459325882602652e-06, |
| "loss": 0.6099, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.05375198797729693, |
| "grad_norm": 0.3877089321613312, |
| "learning_rate": 4.454742650970688e-06, |
| "loss": 1.2087, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.053781169295417076, |
| "grad_norm": 0.3181729018688202, |
| "learning_rate": 4.450159882953187e-06, |
| "loss": 1.0057, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.053810350613537214, |
| "grad_norm": 0.34003591537475586, |
| "learning_rate": 4.4455775824467265e-06, |
| "loss": 0.4751, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.05383953193165735, |
| "grad_norm": 0.38645443320274353, |
| "learning_rate": 4.440995753347484e-06, |
| "loss": 0.9841, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.05386871324977749, |
| "grad_norm": 0.44147324562072754, |
| "learning_rate": 4.436414399551242e-06, |
| "loss": 0.5012, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.05389789456789763, |
| "grad_norm": 0.3572762906551361, |
| "learning_rate": 4.431833524953375e-06, |
| "loss": 0.6526, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.05392707588601777, |
| "grad_norm": 0.4037403464317322, |
| "learning_rate": 4.427253133448847e-06, |
| "loss": 0.9816, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.05395625720413791, |
| "grad_norm": 0.339016854763031, |
| "learning_rate": 4.42267322893222e-06, |
| "loss": 0.4498, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.05398543852225805, |
| "grad_norm": 0.4035623073577881, |
| "learning_rate": 4.418093815297634e-06, |
| "loss": 0.9125, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.05401461984037819, |
| "grad_norm": 0.40717682242393494, |
| "learning_rate": 4.413514896438815e-06, |
| "loss": 0.5943, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.05404380115849833, |
| "grad_norm": 0.38769200444221497, |
| "learning_rate": 4.408936476249066e-06, |
| "loss": 0.6407, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.054072982476618466, |
| "grad_norm": 0.3959398567676544, |
| "learning_rate": 4.404358558621271e-06, |
| "loss": 0.6345, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.05410216379473861, |
| "grad_norm": 0.3937602937221527, |
| "learning_rate": 4.3997811474478815e-06, |
| "loss": 0.651, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.05413134511285875, |
| "grad_norm": 0.44510895013809204, |
| "learning_rate": 4.395204246620918e-06, |
| "loss": 1.8454, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.054160526430978885, |
| "grad_norm": 0.34968265891075134, |
| "learning_rate": 4.3906278600319744e-06, |
| "loss": 0.4953, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.05418970774909903, |
| "grad_norm": 0.363343745470047, |
| "learning_rate": 4.3860519915722e-06, |
| "loss": 0.5053, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.05421888906721917, |
| "grad_norm": 0.3688046634197235, |
| "learning_rate": 4.3814766451323025e-06, |
| "loss": 0.6113, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.054248070385339305, |
| "grad_norm": 0.3350222706794739, |
| "learning_rate": 4.376901824602553e-06, |
| "loss": 0.3777, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.05427725170345944, |
| "grad_norm": 0.438566654920578, |
| "learning_rate": 4.372327533872773e-06, |
| "loss": 0.8989, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.054306433021579587, |
| "grad_norm": 0.3766450881958008, |
| "learning_rate": 4.367753776832327e-06, |
| "loss": 0.6956, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.054335614339699724, |
| "grad_norm": 0.5126607418060303, |
| "learning_rate": 4.363180557370134e-06, |
| "loss": 1.102, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.05436479565781986, |
| "grad_norm": 0.37099429965019226, |
| "learning_rate": 4.358607879374653e-06, |
| "loss": 0.5185, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.054393976975940006, |
| "grad_norm": 0.6988083720207214, |
| "learning_rate": 4.35403574673388e-06, |
| "loss": 1.158, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.054423158294060144, |
| "grad_norm": 0.4372864365577698, |
| "learning_rate": 4.349464163335352e-06, |
| "loss": 1.0901, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.05445233961218028, |
| "grad_norm": 0.3921026885509491, |
| "learning_rate": 4.344893133066137e-06, |
| "loss": 0.5182, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.05448152093030042, |
| "grad_norm": 0.3561581075191498, |
| "learning_rate": 4.340322659812829e-06, |
| "loss": 0.5036, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.05451070224842056, |
| "grad_norm": 0.37669849395751953, |
| "learning_rate": 4.335752747461555e-06, |
| "loss": 0.4819, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.0545398835665407, |
| "grad_norm": 0.40359964966773987, |
| "learning_rate": 4.3311833998979605e-06, |
| "loss": 0.5895, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.05456906488466084, |
| "grad_norm": 0.3802253305912018, |
| "learning_rate": 4.3266146210072106e-06, |
| "loss": 0.717, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.05459824620278098, |
| "grad_norm": 0.6598532199859619, |
| "learning_rate": 4.32204641467399e-06, |
| "loss": 1.2725, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.05462742752090112, |
| "grad_norm": 0.5219306945800781, |
| "learning_rate": 4.3174787847824965e-06, |
| "loss": 1.502, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.05465660883902126, |
| "grad_norm": 0.35989290475845337, |
| "learning_rate": 4.312911735216433e-06, |
| "loss": 0.3574, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.054685790157141395, |
| "grad_norm": 0.4038185477256775, |
| "learning_rate": 4.308345269859012e-06, |
| "loss": 0.5321, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.05471497147526154, |
| "grad_norm": 0.45500364899635315, |
| "learning_rate": 4.3037793925929535e-06, |
| "loss": 0.5182, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.05474415279338168, |
| "grad_norm": 0.34241968393325806, |
| "learning_rate": 4.299214107300469e-06, |
| "loss": 1.1453, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.054773334111501815, |
| "grad_norm": 0.3407379984855652, |
| "learning_rate": 4.294649417863273e-06, |
| "loss": 0.4156, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.05480251542962196, |
| "grad_norm": 0.39247187972068787, |
| "learning_rate": 4.290085328162572e-06, |
| "loss": 0.7003, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.0548316967477421, |
| "grad_norm": 0.3492780327796936, |
| "learning_rate": 4.2855218420790605e-06, |
| "loss": 1.529, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.054860878065862234, |
| "grad_norm": 0.35859256982803345, |
| "learning_rate": 4.28095896349292e-06, |
| "loss": 1.0687, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.05489005938398237, |
| "grad_norm": 0.3901832103729248, |
| "learning_rate": 4.276396696283821e-06, |
| "loss": 0.7215, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.054919240702102516, |
| "grad_norm": 0.36397886276245117, |
| "learning_rate": 4.271835044330908e-06, |
| "loss": 0.5018, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.054948422020222654, |
| "grad_norm": 0.4066603481769562, |
| "learning_rate": 4.2672740115128e-06, |
| "loss": 0.5079, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.05497760333834279, |
| "grad_norm": 0.42464321851730347, |
| "learning_rate": 4.262713601707601e-06, |
| "loss": 1.3597, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.055006784656462936, |
| "grad_norm": 0.40859976410865784, |
| "learning_rate": 4.258153818792875e-06, |
| "loss": 0.6379, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.05503596597458307, |
| "grad_norm": 0.4521663188934326, |
| "learning_rate": 4.253594666645652e-06, |
| "loss": 0.767, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.05506514729270321, |
| "grad_norm": 0.37247422337532043, |
| "learning_rate": 4.2490361491424346e-06, |
| "loss": 1.167, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.05509432861082335, |
| "grad_norm": 0.40135982632637024, |
| "learning_rate": 4.244478270159181e-06, |
| "loss": 0.5856, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.05512350992894349, |
| "grad_norm": 0.3823210895061493, |
| "learning_rate": 4.239921033571303e-06, |
| "loss": 1.0653, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.05515269124706363, |
| "grad_norm": 0.32957029342651367, |
| "learning_rate": 4.235364443253672e-06, |
| "loss": 1.0072, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.05518187256518377, |
| "grad_norm": 0.7841724157333374, |
| "learning_rate": 4.230808503080608e-06, |
| "loss": 0.6956, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.05521105388330391, |
| "grad_norm": 0.4058683216571808, |
| "learning_rate": 4.226253216925875e-06, |
| "loss": 1.6456, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.05524023520142405, |
| "grad_norm": 0.4387176036834717, |
| "learning_rate": 4.221698588662686e-06, |
| "loss": 0.8189, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.05526941651954419, |
| "grad_norm": 0.5146316289901733, |
| "learning_rate": 4.217144622163693e-06, |
| "loss": 1.8388, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.055298597837664325, |
| "grad_norm": 0.493606835603714, |
| "learning_rate": 4.212591321300982e-06, |
| "loss": 1.8391, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.05532777915578447, |
| "grad_norm": 0.37362217903137207, |
| "learning_rate": 4.208038689946075e-06, |
| "loss": 1.095, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.05535696047390461, |
| "grad_norm": 0.4597932994365692, |
| "learning_rate": 4.203486731969929e-06, |
| "loss": 0.6474, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.055386141792024744, |
| "grad_norm": 0.34412139654159546, |
| "learning_rate": 4.198935451242922e-06, |
| "loss": 0.5756, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.05541532311014489, |
| "grad_norm": 0.4167275130748749, |
| "learning_rate": 4.194384851634858e-06, |
| "loss": 0.6875, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.055444504428265026, |
| "grad_norm": 0.34333768486976624, |
| "learning_rate": 4.189834937014967e-06, |
| "loss": 0.5378, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.055473685746385164, |
| "grad_norm": 0.374369740486145, |
| "learning_rate": 4.185285711251889e-06, |
| "loss": 0.8511, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.0555028670645053, |
| "grad_norm": 0.45860588550567627, |
| "learning_rate": 4.180737178213681e-06, |
| "loss": 0.6178, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.055532048382625446, |
| "grad_norm": 0.36128926277160645, |
| "learning_rate": 4.176189341767814e-06, |
| "loss": 1.1685, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.05556122970074558, |
| "grad_norm": 0.3898662328720093, |
| "learning_rate": 4.171642205781163e-06, |
| "loss": 0.6073, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.05559041101886572, |
| "grad_norm": 0.4223347306251526, |
| "learning_rate": 4.167095774120009e-06, |
| "loss": 0.6529, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.05561959233698586, |
| "grad_norm": 0.4098593592643738, |
| "learning_rate": 4.162550050650035e-06, |
| "loss": 0.5334, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.055648773655106, |
| "grad_norm": 0.48553964495658875, |
| "learning_rate": 4.158005039236319e-06, |
| "loss": 1.0301, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.05567795497322614, |
| "grad_norm": 0.37733176350593567, |
| "learning_rate": 4.153460743743335e-06, |
| "loss": 1.0532, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.05570713629134628, |
| "grad_norm": 0.3694377839565277, |
| "learning_rate": 4.148917168034952e-06, |
| "loss": 0.5041, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.05573631760946642, |
| "grad_norm": 0.4429541826248169, |
| "learning_rate": 4.1443743159744205e-06, |
| "loss": 1.0759, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.05576549892758656, |
| "grad_norm": 0.4094473421573639, |
| "learning_rate": 4.139832191424378e-06, |
| "loss": 0.9916, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.0557946802457067, |
| "grad_norm": 0.418549507856369, |
| "learning_rate": 4.135290798246848e-06, |
| "loss": 0.6965, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.055823861563826835, |
| "grad_norm": 0.361209511756897, |
| "learning_rate": 4.130750140303226e-06, |
| "loss": 0.545, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.05585304288194698, |
| "grad_norm": 0.3377372622489929, |
| "learning_rate": 4.126210221454284e-06, |
| "loss": 0.4416, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.05588222420006712, |
| "grad_norm": 0.3751803934574127, |
| "learning_rate": 4.121671045560166e-06, |
| "loss": 0.634, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.055911405518187254, |
| "grad_norm": 0.35254189372062683, |
| "learning_rate": 4.1171326164803885e-06, |
| "loss": 0.9137, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.0559405868363074, |
| "grad_norm": 0.3734283745288849, |
| "learning_rate": 4.112594938073824e-06, |
| "loss": 0.9691, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.055969768154427536, |
| "grad_norm": 0.45132020115852356, |
| "learning_rate": 4.108058014198714e-06, |
| "loss": 0.9632, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.055998949472547674, |
| "grad_norm": 0.3871736228466034, |
| "learning_rate": 4.103521848712657e-06, |
| "loss": 0.5401, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.05602813079066781, |
| "grad_norm": 0.43639063835144043, |
| "learning_rate": 4.098986445472606e-06, |
| "loss": 1.0991, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.056057312108787956, |
| "grad_norm": 0.45422810316085815, |
| "learning_rate": 4.0944518083348635e-06, |
| "loss": 0.6888, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.05608649342690809, |
| "grad_norm": 0.3320983052253723, |
| "learning_rate": 4.089917941155086e-06, |
| "loss": 0.9846, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.05611567474502823, |
| "grad_norm": 0.34560737013816833, |
| "learning_rate": 4.08538484778827e-06, |
| "loss": 0.5253, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.056144856063148375, |
| "grad_norm": 0.3539871871471405, |
| "learning_rate": 4.080852532088756e-06, |
| "loss": 0.5417, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.05617403738126851, |
| "grad_norm": 0.419772207736969, |
| "learning_rate": 4.076320997910227e-06, |
| "loss": 0.6708, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.05620321869938865, |
| "grad_norm": 0.38022029399871826, |
| "learning_rate": 4.071790249105693e-06, |
| "loss": 0.7087, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.05623240001750879, |
| "grad_norm": 0.4010125994682312, |
| "learning_rate": 4.0672602895275044e-06, |
| "loss": 0.6099, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.05626158133562893, |
| "grad_norm": 0.400129497051239, |
| "learning_rate": 4.062731123027338e-06, |
| "loss": 1.0314, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.05629076265374907, |
| "grad_norm": 0.3345048725605011, |
| "learning_rate": 4.0582027534561955e-06, |
| "loss": 0.5521, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.05631994397186921, |
| "grad_norm": 0.36296001076698303, |
| "learning_rate": 4.053675184664397e-06, |
| "loss": 0.5817, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.05634912528998935, |
| "grad_norm": 0.44197455048561096, |
| "learning_rate": 4.049148420501593e-06, |
| "loss": 0.5599, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.05637830660810949, |
| "grad_norm": 0.8010590076446533, |
| "learning_rate": 4.0446224648167375e-06, |
| "loss": 0.5527, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.05640748792622963, |
| "grad_norm": 0.3663330376148224, |
| "learning_rate": 4.040097321458103e-06, |
| "loss": 0.5988, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.056436669244349764, |
| "grad_norm": 0.37294459342956543, |
| "learning_rate": 4.035572994273273e-06, |
| "loss": 1.1222, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.05646585056246991, |
| "grad_norm": 0.40139299631118774, |
| "learning_rate": 4.031049487109133e-06, |
| "loss": 0.5146, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.056495031880590046, |
| "grad_norm": 0.32550185918807983, |
| "learning_rate": 4.0265268038118746e-06, |
| "loss": 0.491, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.056524213198710184, |
| "grad_norm": 0.4051690995693207, |
| "learning_rate": 4.022004948226983e-06, |
| "loss": 0.6588, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.05655339451683033, |
| "grad_norm": 0.3714599609375, |
| "learning_rate": 4.017483924199249e-06, |
| "loss": 0.6142, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.056582575834950466, |
| "grad_norm": 0.39528942108154297, |
| "learning_rate": 4.012963735572749e-06, |
| "loss": 0.7769, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.0566117571530706, |
| "grad_norm": 0.3809904158115387, |
| "learning_rate": 4.0084443861908495e-06, |
| "loss": 0.3167, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.05664093847119074, |
| "grad_norm": 0.5041253566741943, |
| "learning_rate": 4.003925879896209e-06, |
| "loss": 0.486, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.056670119789310885, |
| "grad_norm": 0.3907206356525421, |
| "learning_rate": 3.9994082205307635e-06, |
| "loss": 1.7159, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.05669930110743102, |
| "grad_norm": 0.3656666576862335, |
| "learning_rate": 3.994891411935728e-06, |
| "loss": 1.012, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.05672848242555116, |
| "grad_norm": 0.39329928159713745, |
| "learning_rate": 3.9903754579516036e-06, |
| "loss": 1.2044, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.056757663743671305, |
| "grad_norm": 0.3928038775920868, |
| "learning_rate": 3.985860362418152e-06, |
| "loss": 0.5859, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.05678684506179144, |
| "grad_norm": 0.49849358201026917, |
| "learning_rate": 3.981346129174412e-06, |
| "loss": 1.2811, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.05681602637991158, |
| "grad_norm": 0.3930140435695648, |
| "learning_rate": 3.976832762058691e-06, |
| "loss": 1.0709, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.05684520769803172, |
| "grad_norm": 0.4122617542743683, |
| "learning_rate": 3.972320264908555e-06, |
| "loss": 1.4055, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.05687438901615186, |
| "grad_norm": 0.37206342816352844, |
| "learning_rate": 3.967808641560832e-06, |
| "loss": 0.5844, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.056903570334272, |
| "grad_norm": 0.379912793636322, |
| "learning_rate": 3.963297895851611e-06, |
| "loss": 1.1732, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.05693275165239214, |
| "grad_norm": 0.4069420397281647, |
| "learning_rate": 3.958788031616228e-06, |
| "loss": 0.744, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.05696193297051228, |
| "grad_norm": 0.3518264591693878, |
| "learning_rate": 3.954279052689272e-06, |
| "loss": 0.5915, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.05699111428863242, |
| "grad_norm": 0.42304274439811707, |
| "learning_rate": 3.949770962904585e-06, |
| "loss": 0.5336, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.05702029560675256, |
| "grad_norm": 0.3630048334598541, |
| "learning_rate": 3.945263766095242e-06, |
| "loss": 0.6203, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.057049476924872694, |
| "grad_norm": 0.3621942698955536, |
| "learning_rate": 3.940757466093568e-06, |
| "loss": 0.9456, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.05707865824299284, |
| "grad_norm": 0.4334004819393158, |
| "learning_rate": 3.9362520667311236e-06, |
| "loss": 0.4677, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.057107839561112976, |
| "grad_norm": 0.38032007217407227, |
| "learning_rate": 3.9317475718387e-06, |
| "loss": 0.6671, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.057137020879233114, |
| "grad_norm": 0.36980676651000977, |
| "learning_rate": 3.927243985246322e-06, |
| "loss": 0.6018, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.05716620219735325, |
| "grad_norm": 0.37184154987335205, |
| "learning_rate": 3.922741310783241e-06, |
| "loss": 0.6326, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.057195383515473396, |
| "grad_norm": 0.36791786551475525, |
| "learning_rate": 3.918239552277936e-06, |
| "loss": 0.7141, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.05722456483359353, |
| "grad_norm": 0.4822520613670349, |
| "learning_rate": 3.913738713558104e-06, |
| "loss": 0.8042, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.05725374615171367, |
| "grad_norm": 0.3887101411819458, |
| "learning_rate": 3.909238798450659e-06, |
| "loss": 0.7692, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.057282927469833815, |
| "grad_norm": 0.424528032541275, |
| "learning_rate": 3.904739810781732e-06, |
| "loss": 1.2589, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.05731210878795395, |
| "grad_norm": 0.42004871368408203, |
| "learning_rate": 3.900241754376667e-06, |
| "loss": 0.8461, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.05734129010607409, |
| "grad_norm": 0.3491726517677307, |
| "learning_rate": 3.895744633060011e-06, |
| "loss": 0.4344, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.05737047142419423, |
| "grad_norm": 0.34482014179229736, |
| "learning_rate": 3.891248450655521e-06, |
| "loss": 0.3768, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.05739965274231437, |
| "grad_norm": 0.32053661346435547, |
| "learning_rate": 3.886753210986154e-06, |
| "loss": 0.6142, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.05742883406043451, |
| "grad_norm": 0.41045013070106506, |
| "learning_rate": 3.882258917874062e-06, |
| "loss": 0.4414, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.05745801537855465, |
| "grad_norm": 0.3954102694988251, |
| "learning_rate": 3.877765575140598e-06, |
| "loss": 0.4783, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.05748719669667479, |
| "grad_norm": 0.3864537477493286, |
| "learning_rate": 3.873273186606304e-06, |
| "loss": 1.4345, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.05751637801479493, |
| "grad_norm": 0.45470911264419556, |
| "learning_rate": 3.868781756090909e-06, |
| "loss": 0.9873, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.05754555933291507, |
| "grad_norm": 0.396078884601593, |
| "learning_rate": 3.8642912874133324e-06, |
| "loss": 0.5869, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.057574740651035204, |
| "grad_norm": 0.3716507852077484, |
| "learning_rate": 3.859801784391669e-06, |
| "loss": 0.6326, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.05760392196915535, |
| "grad_norm": 0.4285648465156555, |
| "learning_rate": 3.8553132508431965e-06, |
| "loss": 1.4521, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.057633103287275486, |
| "grad_norm": 0.4111904799938202, |
| "learning_rate": 3.8508256905843725e-06, |
| "loss": 0.8535, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.057662284605395624, |
| "grad_norm": 0.2928890883922577, |
| "learning_rate": 3.8463391074308175e-06, |
| "loss": 0.5198, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.05769146592351577, |
| "grad_norm": 0.36430418491363525, |
| "learning_rate": 3.841853505197326e-06, |
| "loss": 0.5077, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.057720647241635906, |
| "grad_norm": 0.3572177290916443, |
| "learning_rate": 3.837368887697863e-06, |
| "loss": 0.4554, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.05774982855975604, |
| "grad_norm": 0.5488017201423645, |
| "learning_rate": 3.832885258745549e-06, |
| "loss": 0.6071, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.05777900987787618, |
| "grad_norm": 0.411590039730072, |
| "learning_rate": 3.828402622152668e-06, |
| "loss": 0.5628, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.057808191195996325, |
| "grad_norm": 0.33833980560302734, |
| "learning_rate": 3.823920981730656e-06, |
| "loss": 0.5975, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.05783737251411646, |
| "grad_norm": 0.3586738109588623, |
| "learning_rate": 3.819440341290108e-06, |
| "loss": 0.979, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.0578665538322366, |
| "grad_norm": 0.46520721912384033, |
| "learning_rate": 3.8149607046407665e-06, |
| "loss": 1.5342, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.057895735150356745, |
| "grad_norm": 0.32175302505493164, |
| "learning_rate": 3.8104820755915166e-06, |
| "loss": 0.4658, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.05792491646847688, |
| "grad_norm": 0.37614285945892334, |
| "learning_rate": 3.8060044579503926e-06, |
| "loss": 0.6913, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.05795409778659702, |
| "grad_norm": 0.36752036213874817, |
| "learning_rate": 3.8015278555245657e-06, |
| "loss": 0.4291, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.05798327910471716, |
| "grad_norm": 0.5331560373306274, |
| "learning_rate": 3.797052272120342e-06, |
| "loss": 1.0343, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.0580124604228373, |
| "grad_norm": 0.4381938874721527, |
| "learning_rate": 3.7925777115431663e-06, |
| "loss": 0.9085, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.05804164174095744, |
| "grad_norm": 0.640721321105957, |
| "learning_rate": 3.78810417759761e-06, |
| "loss": 1.1147, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.05807082305907758, |
| "grad_norm": 0.32372066378593445, |
| "learning_rate": 3.7836316740873714e-06, |
| "loss": 0.7606, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.05810000437719772, |
| "grad_norm": 0.3875253200531006, |
| "learning_rate": 3.779160204815275e-06, |
| "loss": 0.5897, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.05812918569531786, |
| "grad_norm": 0.4967852234840393, |
| "learning_rate": 3.774689773583265e-06, |
| "loss": 0.3453, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.058158367013437996, |
| "grad_norm": 0.34489792585372925, |
| "learning_rate": 3.7702203841924003e-06, |
| "loss": 0.6976, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.058187548331558134, |
| "grad_norm": 0.36758995056152344, |
| "learning_rate": 3.765752040442858e-06, |
| "loss": 0.5806, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.05821672964967828, |
| "grad_norm": 0.3414762020111084, |
| "learning_rate": 3.7612847461339263e-06, |
| "loss": 0.7096, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.058245910967798416, |
| "grad_norm": 0.39971426129341125, |
| "learning_rate": 3.756818505063995e-06, |
| "loss": 0.9451, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.05827509228591855, |
| "grad_norm": 0.41015398502349854, |
| "learning_rate": 3.752353321030565e-06, |
| "loss": 1.2059, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.0583042736040387, |
| "grad_norm": 0.36627599596977234, |
| "learning_rate": 3.747889197830238e-06, |
| "loss": 1.1214, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.058333454922158835, |
| "grad_norm": 0.426281601190567, |
| "learning_rate": 3.743426139258709e-06, |
| "loss": 2.5065, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.05836263624027897, |
| "grad_norm": 0.37110453844070435, |
| "learning_rate": 3.7389641491107677e-06, |
| "loss": 0.6791, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.05839181755839911, |
| "grad_norm": 0.29426029324531555, |
| "learning_rate": 3.734503231180304e-06, |
| "loss": 0.8602, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.058420998876519255, |
| "grad_norm": 0.3624902665615082, |
| "learning_rate": 3.730043389260288e-06, |
| "loss": 0.4638, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.05845018019463939, |
| "grad_norm": 0.40695860981941223, |
| "learning_rate": 3.7255846271427752e-06, |
| "loss": 1.0649, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.05847936151275953, |
| "grad_norm": 0.33267369866371155, |
| "learning_rate": 3.7211269486189085e-06, |
| "loss": 1.2341, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.058508542830879674, |
| "grad_norm": 0.3531542122364044, |
| "learning_rate": 3.7166703574789042e-06, |
| "loss": 0.4348, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.05853772414899981, |
| "grad_norm": 0.42720696330070496, |
| "learning_rate": 3.7122148575120532e-06, |
| "loss": 0.9152, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.05856690546711995, |
| "grad_norm": 0.34199726581573486, |
| "learning_rate": 3.7077604525067256e-06, |
| "loss": 0.4743, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.05859608678524009, |
| "grad_norm": 0.38692760467529297, |
| "learning_rate": 3.7033071462503546e-06, |
| "loss": 0.6114, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.05862526810336023, |
| "grad_norm": 0.3993266522884369, |
| "learning_rate": 3.6988549425294385e-06, |
| "loss": 0.9443, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.05865444942148037, |
| "grad_norm": 0.37488895654678345, |
| "learning_rate": 3.6944038451295423e-06, |
| "loss": 0.5313, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.058683630739600506, |
| "grad_norm": 0.3746214509010315, |
| "learning_rate": 3.689953857835289e-06, |
| "loss": 0.6184, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.058712812057720644, |
| "grad_norm": 0.3636399507522583, |
| "learning_rate": 3.6855049844303546e-06, |
| "loss": 1.0902, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.05874199337584079, |
| "grad_norm": 0.3949470520019531, |
| "learning_rate": 3.681057228697472e-06, |
| "loss": 0.6768, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.058771174693960926, |
| "grad_norm": 0.34592562913894653, |
| "learning_rate": 3.676610594418424e-06, |
| "loss": 0.5659, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.05880035601208106, |
| "grad_norm": 0.5117006897926331, |
| "learning_rate": 3.6721650853740333e-06, |
| "loss": 1.6459, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.05882953733020121, |
| "grad_norm": 0.39747756719589233, |
| "learning_rate": 3.6677207053441737e-06, |
| "loss": 0.4768, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.058858718648321345, |
| "grad_norm": 0.3781323730945587, |
| "learning_rate": 3.6632774581077577e-06, |
| "loss": 0.5884, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.05888789996644148, |
| "grad_norm": 0.406512588262558, |
| "learning_rate": 3.658835347442728e-06, |
| "loss": 1.456, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.05891708128456162, |
| "grad_norm": 0.3331405818462372, |
| "learning_rate": 3.65439437712607e-06, |
| "loss": 1.0464, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.058946262602681765, |
| "grad_norm": 0.34715914726257324, |
| "learning_rate": 3.649954550933795e-06, |
| "loss": 0.4919, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.0589754439208019, |
| "grad_norm": 0.6263701915740967, |
| "learning_rate": 3.645515872640939e-06, |
| "loss": 1.0162, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.05900462523892204, |
| "grad_norm": 0.380294531583786, |
| "learning_rate": 3.641078346021566e-06, |
| "loss": 0.9757, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.059033806557042184, |
| "grad_norm": 0.35354727506637573, |
| "learning_rate": 3.6366419748487614e-06, |
| "loss": 0.4578, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.05906298787516232, |
| "grad_norm": 0.4559861421585083, |
| "learning_rate": 3.6322067628946234e-06, |
| "loss": 1.1305, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.05909216919328246, |
| "grad_norm": 0.4016369879245758, |
| "learning_rate": 3.6277727139302686e-06, |
| "loss": 0.823, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.0591213505114026, |
| "grad_norm": 0.3946945369243622, |
| "learning_rate": 3.6233398317258258e-06, |
| "loss": 0.7454, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.05915053182952274, |
| "grad_norm": 0.41596707701683044, |
| "learning_rate": 3.618908120050426e-06, |
| "loss": 1.1742, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.05917971314764288, |
| "grad_norm": 0.40080493688583374, |
| "learning_rate": 3.614477582672209e-06, |
| "loss": 0.5426, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.059208894465763016, |
| "grad_norm": 0.27254611253738403, |
| "learning_rate": 3.610048223358319e-06, |
| "loss": 0.5847, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.05923807578388316, |
| "grad_norm": 0.40630316734313965, |
| "learning_rate": 3.6056200458748923e-06, |
| "loss": 0.6148, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.0592672571020033, |
| "grad_norm": 0.35947859287261963, |
| "learning_rate": 3.6011930539870615e-06, |
| "loss": 0.5263, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.059296438420123436, |
| "grad_norm": 0.368412047624588, |
| "learning_rate": 3.596767251458956e-06, |
| "loss": 0.4562, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.059325619738243573, |
| "grad_norm": 0.393402099609375, |
| "learning_rate": 3.5923426420536907e-06, |
| "loss": 1.2302, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.05935480105636372, |
| "grad_norm": 0.45544981956481934, |
| "learning_rate": 3.587919229533362e-06, |
| "loss": 0.7995, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.059383982374483855, |
| "grad_norm": 0.3554464876651764, |
| "learning_rate": 3.583497017659058e-06, |
| "loss": 0.7513, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.05941316369260399, |
| "grad_norm": 0.3423815071582794, |
| "learning_rate": 3.5790760101908384e-06, |
| "loss": 0.5747, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.05944234501072414, |
| "grad_norm": 0.377187043428421, |
| "learning_rate": 3.574656210887738e-06, |
| "loss": 1.0689, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.059471526328844275, |
| "grad_norm": 0.37913021445274353, |
| "learning_rate": 3.5702376235077716e-06, |
| "loss": 0.5395, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.05950070764696441, |
| "grad_norm": 0.3623841404914856, |
| "learning_rate": 3.56582025180792e-06, |
| "loss": 0.4939, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.05952988896508455, |
| "grad_norm": 0.36248263716697693, |
| "learning_rate": 3.561404099544124e-06, |
| "loss": 0.5806, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.059559070283204694, |
| "grad_norm": 0.33705687522888184, |
| "learning_rate": 3.5569891704712983e-06, |
| "loss": 0.6256, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.05958825160132483, |
| "grad_norm": 0.34825190901756287, |
| "learning_rate": 3.5525754683433122e-06, |
| "loss": 0.663, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.05961743291944497, |
| "grad_norm": 0.3465997278690338, |
| "learning_rate": 3.5481629969129895e-06, |
| "loss": 0.9509, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.059646614237565114, |
| "grad_norm": 0.3999250531196594, |
| "learning_rate": 3.5437517599321092e-06, |
| "loss": 0.721, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.05967579555568525, |
| "grad_norm": 0.4010320007801056, |
| "learning_rate": 3.539341761151407e-06, |
| "loss": 0.5923, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.05970497687380539, |
| "grad_norm": 0.39981862902641296, |
| "learning_rate": 3.5349330043205555e-06, |
| "loss": 1.0829, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.05973415819192553, |
| "grad_norm": 0.4079881012439728, |
| "learning_rate": 3.5305254931881772e-06, |
| "loss": 0.6793, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.05976333951004567, |
| "grad_norm": 0.41426578164100647, |
| "learning_rate": 3.5261192315018365e-06, |
| "loss": 0.7014, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.05979252082816581, |
| "grad_norm": 0.5225511193275452, |
| "learning_rate": 3.5217142230080313e-06, |
| "loss": 0.8797, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.059821702146285946, |
| "grad_norm": 0.4548954963684082, |
| "learning_rate": 3.5173104714521955e-06, |
| "loss": 1.3161, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.05985088346440609, |
| "grad_norm": 0.3670787513256073, |
| "learning_rate": 3.512907980578698e-06, |
| "loss": 0.4437, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.05988006478252623, |
| "grad_norm": 0.37493085861206055, |
| "learning_rate": 3.5085067541308303e-06, |
| "loss": 0.62, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.059909246100646366, |
| "grad_norm": 0.33322057127952576, |
| "learning_rate": 3.504106795850809e-06, |
| "loss": 0.4961, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.0599384274187665, |
| "grad_norm": 0.3718531131744385, |
| "learning_rate": 3.4997081094797796e-06, |
| "loss": 0.4671, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.05996760873688665, |
| "grad_norm": 0.35541006922721863, |
| "learning_rate": 3.4953106987577966e-06, |
| "loss": 0.6469, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.059996790055006785, |
| "grad_norm": 0.3479944169521332, |
| "learning_rate": 3.4909145674238353e-06, |
| "loss": 0.5606, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.06002597137312692, |
| "grad_norm": 0.34883078932762146, |
| "learning_rate": 3.486519719215785e-06, |
| "loss": 0.4301, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.06005515269124707, |
| "grad_norm": 0.36048266291618347, |
| "learning_rate": 3.482126157870439e-06, |
| "loss": 0.6564, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.060084334009367205, |
| "grad_norm": 0.32860925793647766, |
| "learning_rate": 3.4777338871234944e-06, |
| "loss": 0.3749, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.06011351532748734, |
| "grad_norm": 0.385186105966568, |
| "learning_rate": 3.4733429107095636e-06, |
| "loss": 1.0554, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.06014269664560748, |
| "grad_norm": 0.4215574264526367, |
| "learning_rate": 3.4689532323621446e-06, |
| "loss": 0.666, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.060171877963727624, |
| "grad_norm": 0.44166120886802673, |
| "learning_rate": 3.464564855813637e-06, |
| "loss": 0.6512, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.06020105928184776, |
| "grad_norm": 0.375983864068985, |
| "learning_rate": 3.460177784795335e-06, |
| "loss": 0.6855, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.0602302405999679, |
| "grad_norm": 0.40821775794029236, |
| "learning_rate": 3.4557920230374227e-06, |
| "loss": 1.1705, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.06025942191808804, |
| "grad_norm": 0.36252719163894653, |
| "learning_rate": 3.4514075742689667e-06, |
| "loss": 0.9223, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.06028860323620818, |
| "grad_norm": 0.3632132112979889, |
| "learning_rate": 3.447024442217921e-06, |
| "loss": 0.5949, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.06031778455432832, |
| "grad_norm": 0.6904483437538147, |
| "learning_rate": 3.4426426306111217e-06, |
| "loss": 1.9521, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.060346965872448456, |
| "grad_norm": 0.39848795533180237, |
| "learning_rate": 3.438262143174276e-06, |
| "loss": 0.5959, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.0603761471905686, |
| "grad_norm": 0.4124288260936737, |
| "learning_rate": 3.4338829836319705e-06, |
| "loss": 0.6701, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.06040532850868874, |
| "grad_norm": 0.40721356868743896, |
| "learning_rate": 3.429505155707663e-06, |
| "loss": 1.1352, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.060434509826808876, |
| "grad_norm": 0.43586406111717224, |
| "learning_rate": 3.425128663123674e-06, |
| "loss": 0.8024, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.06046369114492901, |
| "grad_norm": 0.38717782497406006, |
| "learning_rate": 3.4207535096011924e-06, |
| "loss": 0.529, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.06049287246304916, |
| "grad_norm": 0.4327870309352875, |
| "learning_rate": 3.4163796988602702e-06, |
| "loss": 0.5836, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.060522053781169295, |
| "grad_norm": 0.3442043662071228, |
| "learning_rate": 3.412007234619814e-06, |
| "loss": 1.1117, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.06055123509928943, |
| "grad_norm": 0.37794965505599976, |
| "learning_rate": 3.407636120597585e-06, |
| "loss": 0.5469, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.06058041641740958, |
| "grad_norm": 0.32557815313339233, |
| "learning_rate": 3.4032663605102023e-06, |
| "loss": 0.3716, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.060609597735529715, |
| "grad_norm": 0.3767428398132324, |
| "learning_rate": 3.3988979580731262e-06, |
| "loss": 0.5219, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.06063877905364985, |
| "grad_norm": 0.3935869038105011, |
| "learning_rate": 3.3945309170006667e-06, |
| "loss": 1.3649, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.06066796037176999, |
| "grad_norm": 0.43829345703125, |
| "learning_rate": 3.3901652410059794e-06, |
| "loss": 0.791, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.060697141689890134, |
| "grad_norm": 0.44142934679985046, |
| "learning_rate": 3.385800933801051e-06, |
| "loss": 1.0685, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.06072632300801027, |
| "grad_norm": 0.3981931209564209, |
| "learning_rate": 3.3814379990967094e-06, |
| "loss": 1.2067, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.06075550432613041, |
| "grad_norm": 0.4052174985408783, |
| "learning_rate": 3.377076440602619e-06, |
| "loss": 0.8231, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.060784685644250554, |
| "grad_norm": 0.4572196304798126, |
| "learning_rate": 3.3727162620272662e-06, |
| "loss": 1.4312, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.06081386696237069, |
| "grad_norm": 0.4845515787601471, |
| "learning_rate": 3.3683574670779704e-06, |
| "loss": 0.8968, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.06084304828049083, |
| "grad_norm": 0.375808447599411, |
| "learning_rate": 3.364000059460869e-06, |
| "loss": 1.1574, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.060872229598610966, |
| "grad_norm": 0.4806961715221405, |
| "learning_rate": 3.3596440428809264e-06, |
| "loss": 0.5845, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.06090141091673111, |
| "grad_norm": 0.3441624641418457, |
| "learning_rate": 3.3552894210419196e-06, |
| "loss": 0.4982, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.06093059223485125, |
| "grad_norm": 0.36816534399986267, |
| "learning_rate": 3.3509361976464393e-06, |
| "loss": 0.7915, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.060959773552971386, |
| "grad_norm": 0.3683021068572998, |
| "learning_rate": 3.3465843763958917e-06, |
| "loss": 1.0985, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.06098895487109153, |
| "grad_norm": 0.33681520819664, |
| "learning_rate": 3.342233960990486e-06, |
| "loss": 0.9354, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.06101813618921167, |
| "grad_norm": 0.5260627269744873, |
| "learning_rate": 3.337884955129237e-06, |
| "loss": 0.5787, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.061047317507331805, |
| "grad_norm": 0.34252187609672546, |
| "learning_rate": 3.333537362509965e-06, |
| "loss": 0.676, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.06107649882545194, |
| "grad_norm": 0.3894500136375427, |
| "learning_rate": 3.3291911868292823e-06, |
| "loss": 0.5627, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.06110568014357209, |
| "grad_norm": 0.6068259477615356, |
| "learning_rate": 3.3248464317826007e-06, |
| "loss": 1.421, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.061134861461692225, |
| "grad_norm": 0.3254760801792145, |
| "learning_rate": 3.3205031010641254e-06, |
| "loss": 0.3292, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.06116404277981236, |
| "grad_norm": 0.42579495906829834, |
| "learning_rate": 3.316161198366845e-06, |
| "loss": 1.1003, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.06119322409793251, |
| "grad_norm": 0.3681286573410034, |
| "learning_rate": 3.311820727382539e-06, |
| "loss": 1.1427, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.061222405416052644, |
| "grad_norm": 0.3053743243217468, |
| "learning_rate": 3.3074816918017678e-06, |
| "loss": 0.599, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.06125158673417278, |
| "grad_norm": 0.32214224338531494, |
| "learning_rate": 3.3031440953138695e-06, |
| "loss": 1.6025, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.06128076805229292, |
| "grad_norm": 0.3452838957309723, |
| "learning_rate": 3.298807941606961e-06, |
| "loss": 0.6428, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.061309949370413064, |
| "grad_norm": 0.31177929043769836, |
| "learning_rate": 3.2944732343679342e-06, |
| "loss": 0.4006, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.0613391306885332, |
| "grad_norm": 0.40319034457206726, |
| "learning_rate": 3.290139977282445e-06, |
| "loss": 0.5297, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.06136831200665334, |
| "grad_norm": 0.4573376476764679, |
| "learning_rate": 3.2858081740349212e-06, |
| "loss": 0.3914, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.06139749332477348, |
| "grad_norm": 0.3692920207977295, |
| "learning_rate": 3.2814778283085558e-06, |
| "loss": 0.6554, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.06142667464289362, |
| "grad_norm": 0.336704283952713, |
| "learning_rate": 3.2771489437852962e-06, |
| "loss": 0.4431, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.06145585596101376, |
| "grad_norm": 0.39836594462394714, |
| "learning_rate": 3.272821524145854e-06, |
| "loss": 0.6818, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.061485037279133896, |
| "grad_norm": 0.4600656032562256, |
| "learning_rate": 3.2684955730696884e-06, |
| "loss": 0.9989, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.06151421859725404, |
| "grad_norm": 0.3948930501937866, |
| "learning_rate": 3.2641710942350173e-06, |
| "loss": 1.1482, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.06154339991537418, |
| "grad_norm": 0.44119468331336975, |
| "learning_rate": 3.2598480913188047e-06, |
| "loss": 0.6025, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.061572581233494315, |
| "grad_norm": 0.40817445516586304, |
| "learning_rate": 3.255526567996753e-06, |
| "loss": 1.1296, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.06160176255161446, |
| "grad_norm": 0.3587343990802765, |
| "learning_rate": 3.251206527943317e-06, |
| "loss": 0.6331, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.0616309438697346, |
| "grad_norm": 0.387046217918396, |
| "learning_rate": 3.246887974831684e-06, |
| "loss": 0.5536, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.061660125187854735, |
| "grad_norm": 0.35376015305519104, |
| "learning_rate": 3.2425709123337755e-06, |
| "loss": 0.5797, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.06168930650597487, |
| "grad_norm": 0.4045913517475128, |
| "learning_rate": 3.2382553441202513e-06, |
| "loss": 1.4825, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.06171848782409502, |
| "grad_norm": 0.35432296991348267, |
| "learning_rate": 3.2339412738604984e-06, |
| "loss": 0.489, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.061747669142215154, |
| "grad_norm": 0.39025989174842834, |
| "learning_rate": 3.229628705222626e-06, |
| "loss": 0.8269, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.06177685046033529, |
| "grad_norm": 0.3491201400756836, |
| "learning_rate": 3.2253176418734733e-06, |
| "loss": 0.5148, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.06180603177845543, |
| "grad_norm": 0.4431854486465454, |
| "learning_rate": 3.221008087478594e-06, |
| "loss": 0.6889, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.061835213096575574, |
| "grad_norm": 0.3849260210990906, |
| "learning_rate": 3.2167000457022603e-06, |
| "loss": 0.4761, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.06186439441469571, |
| "grad_norm": 0.421662300825119, |
| "learning_rate": 3.212393520207463e-06, |
| "loss": 1.2273, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.06189357573281585, |
| "grad_norm": 0.3457053303718567, |
| "learning_rate": 3.208088514655896e-06, |
| "loss": 0.5293, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.06192275705093599, |
| "grad_norm": 0.34606316685676575, |
| "learning_rate": 3.2037850327079637e-06, |
| "loss": 0.632, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.06195193836905613, |
| "grad_norm": 0.36225220561027527, |
| "learning_rate": 3.199483078022781e-06, |
| "loss": 0.7166, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.06198111968717627, |
| "grad_norm": 0.4316044747829437, |
| "learning_rate": 3.1951826542581544e-06, |
| "loss": 0.5265, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.062010301005296406, |
| "grad_norm": 0.45886167883872986, |
| "learning_rate": 3.1908837650705935e-06, |
| "loss": 1.8024, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.06203948232341655, |
| "grad_norm": 0.3482613265514374, |
| "learning_rate": 3.1865864141153068e-06, |
| "loss": 1.0219, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.06206866364153669, |
| "grad_norm": 0.33983346819877625, |
| "learning_rate": 3.1822906050461867e-06, |
| "loss": 0.5453, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.062097844959656825, |
| "grad_norm": 0.38761284947395325, |
| "learning_rate": 3.177996341515822e-06, |
| "loss": 1.1932, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.06212702627777697, |
| "grad_norm": 0.3563324213027954, |
| "learning_rate": 3.1737036271754806e-06, |
| "loss": 0.6645, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.06215620759589711, |
| "grad_norm": 0.40343236923217773, |
| "learning_rate": 3.169412465675121e-06, |
| "loss": 1.019, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.062185388914017245, |
| "grad_norm": 0.31767743825912476, |
| "learning_rate": 3.1651228606633766e-06, |
| "loss": 1.8443, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.06221457023213738, |
| "grad_norm": 0.3186255097389221, |
| "learning_rate": 3.160834815787554e-06, |
| "loss": 0.4437, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.06224375155025753, |
| "grad_norm": 0.35094353556632996, |
| "learning_rate": 3.156548334693642e-06, |
| "loss": 0.4198, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.062272932868377665, |
| "grad_norm": 0.40495699644088745, |
| "learning_rate": 3.1522634210262937e-06, |
| "loss": 0.6789, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.0623021141864978, |
| "grad_norm": 0.3583487272262573, |
| "learning_rate": 3.1479800784288288e-06, |
| "loss": 1.2129, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.062331295504617946, |
| "grad_norm": 0.41153812408447266, |
| "learning_rate": 3.143698310543236e-06, |
| "loss": 1.1242, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.062360476822738084, |
| "grad_norm": 0.41002485156059265, |
| "learning_rate": 3.1394181210101614e-06, |
| "loss": 0.6377, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.06238965814085822, |
| "grad_norm": 0.38274410367012024, |
| "learning_rate": 3.135139513468909e-06, |
| "loss": 0.6045, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.06241883945897836, |
| "grad_norm": 0.38776078820228577, |
| "learning_rate": 3.130862491557439e-06, |
| "loss": 0.5413, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.062448020777098504, |
| "grad_norm": 0.33805227279663086, |
| "learning_rate": 3.1265870589123647e-06, |
| "loss": 0.8001, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.06247720209521864, |
| "grad_norm": 0.37673822045326233, |
| "learning_rate": 3.122313219168943e-06, |
| "loss": 1.015, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.06250638341333878, |
| "grad_norm": 1.0381897687911987, |
| "learning_rate": 3.1180409759610823e-06, |
| "loss": 0.5086, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.06253556473145892, |
| "grad_norm": 0.47404688596725464, |
| "learning_rate": 3.113770332921332e-06, |
| "loss": 0.5907, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.06256474604957905, |
| "grad_norm": 0.2575134336948395, |
| "learning_rate": 3.1095012936808772e-06, |
| "loss": 0.9414, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.0625939273676992, |
| "grad_norm": 0.3013460338115692, |
| "learning_rate": 3.105233861869544e-06, |
| "loss": 0.3504, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.06262310868581934, |
| "grad_norm": 0.47241297364234924, |
| "learning_rate": 3.1009680411157907e-06, |
| "loss": 1.1087, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.06265229000393947, |
| "grad_norm": 0.3396003246307373, |
| "learning_rate": 3.096703835046703e-06, |
| "loss": 0.5008, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.06268147132205962, |
| "grad_norm": 0.5099637508392334, |
| "learning_rate": 3.0924412472879946e-06, |
| "loss": 0.6721, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.06271065264017976, |
| "grad_norm": 0.3837086260318756, |
| "learning_rate": 3.0881802814640066e-06, |
| "loss": 0.5322, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.06273983395829989, |
| "grad_norm": 1.8514056205749512, |
| "learning_rate": 3.083920941197699e-06, |
| "loss": 0.852, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.06276901527642004, |
| "grad_norm": 0.5314767956733704, |
| "learning_rate": 3.079663230110647e-06, |
| "loss": 1.216, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.06279819659454018, |
| "grad_norm": 0.39074236154556274, |
| "learning_rate": 3.0754071518230446e-06, |
| "loss": 0.6562, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.06282737791266031, |
| "grad_norm": 0.39116254448890686, |
| "learning_rate": 3.0711527099536958e-06, |
| "loss": 1.0678, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.06285655923078046, |
| "grad_norm": 0.362162709236145, |
| "learning_rate": 3.0668999081200113e-06, |
| "loss": 0.5859, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.06288574054890059, |
| "grad_norm": 0.35680273175239563, |
| "learning_rate": 3.062648749938011e-06, |
| "loss": 1.0341, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.06291492186702073, |
| "grad_norm": 0.4902818500995636, |
| "learning_rate": 3.058399239022316e-06, |
| "loss": 0.434, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.06294410318514088, |
| "grad_norm": 0.3766494393348694, |
| "learning_rate": 3.0541513789861432e-06, |
| "loss": 0.6276, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.062973284503261, |
| "grad_norm": 0.3613470494747162, |
| "learning_rate": 3.049905173441312e-06, |
| "loss": 0.5353, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.06300246582138115, |
| "grad_norm": 0.39713186025619507, |
| "learning_rate": 3.0456606259982323e-06, |
| "loss": 0.572, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.0630316471395013, |
| "grad_norm": 0.35310646891593933, |
| "learning_rate": 3.0414177402658995e-06, |
| "loss": 1.0338, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.06306082845762143, |
| "grad_norm": 0.4612269103527069, |
| "learning_rate": 3.0371765198519038e-06, |
| "loss": 0.603, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.06309000977574157, |
| "grad_norm": 0.37410804629325867, |
| "learning_rate": 3.0329369683624167e-06, |
| "loss": 0.7313, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.06311919109386172, |
| "grad_norm": 0.3971019387245178, |
| "learning_rate": 3.0286990894021855e-06, |
| "loss": 1.2541, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.06314837241198185, |
| "grad_norm": 0.6406969428062439, |
| "learning_rate": 3.0244628865745444e-06, |
| "loss": 1.4167, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.06317755373010199, |
| "grad_norm": 0.4686782956123352, |
| "learning_rate": 3.0202283634813976e-06, |
| "loss": 0.4171, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.06320673504822213, |
| "grad_norm": 0.45741724967956543, |
| "learning_rate": 3.0159955237232174e-06, |
| "loss": 0.6919, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.06323591636634227, |
| "grad_norm": 0.3061526119709015, |
| "learning_rate": 3.011764370899053e-06, |
| "loss": 0.4576, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.06326509768446241, |
| "grad_norm": 0.389623761177063, |
| "learning_rate": 3.007534908606514e-06, |
| "loss": 0.5331, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.06329427900258254, |
| "grad_norm": 0.31802383065223694, |
| "learning_rate": 3.0033071404417723e-06, |
| "loss": 0.3926, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.06332346032070268, |
| "grad_norm": 0.3451498746871948, |
| "learning_rate": 2.9990810699995608e-06, |
| "loss": 1.0066, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.06335264163882283, |
| "grad_norm": 0.32362866401672363, |
| "learning_rate": 2.994856700873171e-06, |
| "loss": 0.9975, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.06338182295694296, |
| "grad_norm": 0.33401358127593994, |
| "learning_rate": 2.9906340366544427e-06, |
| "loss": 0.4309, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.0634110042750631, |
| "grad_norm": 0.49933385848999023, |
| "learning_rate": 2.98641308093377e-06, |
| "loss": 0.6316, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.06344018559318325, |
| "grad_norm": 0.36883747577667236, |
| "learning_rate": 2.9821938373000957e-06, |
| "loss": 0.565, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.06346936691130338, |
| "grad_norm": 0.3524225950241089, |
| "learning_rate": 2.977976309340902e-06, |
| "loss": 1.0099, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.06349854822942352, |
| "grad_norm": 0.3187222480773926, |
| "learning_rate": 2.973760500642212e-06, |
| "loss": 0.4277, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.06352772954754367, |
| "grad_norm": 0.34643998742103577, |
| "learning_rate": 2.9695464147885963e-06, |
| "loss": 0.3281, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.0635569108656638, |
| "grad_norm": 0.3783508837223053, |
| "learning_rate": 2.96533405536315e-06, |
| "loss": 0.5558, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.06358609218378394, |
| "grad_norm": 0.3579278290271759, |
| "learning_rate": 2.961123425947503e-06, |
| "loss": 0.5917, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.06361527350190409, |
| "grad_norm": 0.3152145743370056, |
| "learning_rate": 2.956914530121819e-06, |
| "loss": 0.5916, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.06364445482002422, |
| "grad_norm": 0.34904199838638306, |
| "learning_rate": 2.952707371464783e-06, |
| "loss": 0.5051, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.06367363613814436, |
| "grad_norm": 0.34758520126342773, |
| "learning_rate": 2.948501953553602e-06, |
| "loss": 1.0571, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.0637028174562645, |
| "grad_norm": 0.5069554448127747, |
| "learning_rate": 2.9442982799640083e-06, |
| "loss": 1.6586, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.06373199877438464, |
| "grad_norm": 0.3244377076625824, |
| "learning_rate": 2.9400963542702477e-06, |
| "loss": 0.4786, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.06376118009250478, |
| "grad_norm": 0.36959919333457947, |
| "learning_rate": 2.935896180045077e-06, |
| "loss": 0.5654, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.06379036141062491, |
| "grad_norm": 0.3787088692188263, |
| "learning_rate": 2.9316977608597687e-06, |
| "loss": 0.533, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.06381954272874506, |
| "grad_norm": 0.36045530438423157, |
| "learning_rate": 2.9275011002841026e-06, |
| "loss": 0.5307, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.0638487240468652, |
| "grad_norm": 0.4458478093147278, |
| "learning_rate": 2.9233062018863577e-06, |
| "loss": 0.5184, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.06387790536498533, |
| "grad_norm": 0.34621813893318176, |
| "learning_rate": 2.9191130692333237e-06, |
| "loss": 0.5244, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.06390708668310548, |
| "grad_norm": 0.42607399821281433, |
| "learning_rate": 2.914921705890279e-06, |
| "loss": 1.2018, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.06393626800122562, |
| "grad_norm": 0.44084563851356506, |
| "learning_rate": 2.9107321154210068e-06, |
| "loss": 0.5804, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.06396544931934575, |
| "grad_norm": 0.3975321054458618, |
| "learning_rate": 2.9065443013877743e-06, |
| "loss": 0.5792, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.0639946306374659, |
| "grad_norm": 0.4005292057991028, |
| "learning_rate": 2.9023582673513474e-06, |
| "loss": 1.1083, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.06402381195558603, |
| "grad_norm": 0.42678356170654297, |
| "learning_rate": 2.8981740168709716e-06, |
| "loss": 0.6105, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.06405299327370617, |
| "grad_norm": 0.4016914665699005, |
| "learning_rate": 2.893991553504375e-06, |
| "loss": 0.5373, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.06408217459182632, |
| "grad_norm": 0.3963612914085388, |
| "learning_rate": 2.8898108808077725e-06, |
| "loss": 0.5966, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.06411135590994645, |
| "grad_norm": 0.42795488238334656, |
| "learning_rate": 2.8856320023358543e-06, |
| "loss": 1.0722, |
| "step": 2197 |
| }, |
| { |
| "epoch": 0.06414053722806659, |
| "grad_norm": 0.3308316171169281, |
| "learning_rate": 2.8814549216417805e-06, |
| "loss": 1.0366, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.06416971854618674, |
| "grad_norm": 0.3379039764404297, |
| "learning_rate": 2.8772796422771886e-06, |
| "loss": 0.5168, |
| "step": 2199 |
| }, |
| { |
| "epoch": 0.06419889986430687, |
| "grad_norm": 0.423372745513916, |
| "learning_rate": 2.873106167792182e-06, |
| "loss": 0.8584, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.06422808118242701, |
| "grad_norm": 0.5571008920669556, |
| "learning_rate": 2.8689345017353253e-06, |
| "loss": 0.869, |
| "step": 2201 |
| }, |
| { |
| "epoch": 0.06425726250054715, |
| "grad_norm": 0.38183578848838806, |
| "learning_rate": 2.8647646476536535e-06, |
| "loss": 1.3536, |
| "step": 2202 |
| }, |
| { |
| "epoch": 0.06428644381866729, |
| "grad_norm": 0.5668140053749084, |
| "learning_rate": 2.8605966090926574e-06, |
| "loss": 0.7606, |
| "step": 2203 |
| }, |
| { |
| "epoch": 0.06431562513678743, |
| "grad_norm": 0.42183807492256165, |
| "learning_rate": 2.8564303895962797e-06, |
| "loss": 0.4986, |
| "step": 2204 |
| }, |
| { |
| "epoch": 0.06434480645490757, |
| "grad_norm": 0.39345571398735046, |
| "learning_rate": 2.8522659927069253e-06, |
| "loss": 1.0858, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.0643739877730277, |
| "grad_norm": 0.44727692008018494, |
| "learning_rate": 2.8481034219654413e-06, |
| "loss": 1.1909, |
| "step": 2206 |
| }, |
| { |
| "epoch": 0.06440316909114785, |
| "grad_norm": 0.3451339304447174, |
| "learning_rate": 2.8439426809111244e-06, |
| "loss": 0.4234, |
| "step": 2207 |
| }, |
| { |
| "epoch": 0.06443235040926798, |
| "grad_norm": 0.35160118341445923, |
| "learning_rate": 2.839783773081719e-06, |
| "loss": 0.6186, |
| "step": 2208 |
| }, |
| { |
| "epoch": 0.06446153172738812, |
| "grad_norm": 0.36650118231773376, |
| "learning_rate": 2.835626702013406e-06, |
| "loss": 0.6431, |
| "step": 2209 |
| }, |
| { |
| "epoch": 0.06449071304550827, |
| "grad_norm": 0.3791310787200928, |
| "learning_rate": 2.8314714712408075e-06, |
| "loss": 0.7359, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.0645198943636284, |
| "grad_norm": 0.366156667470932, |
| "learning_rate": 2.827318084296983e-06, |
| "loss": 0.5325, |
| "step": 2211 |
| }, |
| { |
| "epoch": 0.06454907568174854, |
| "grad_norm": 0.36698734760284424, |
| "learning_rate": 2.8231665447134183e-06, |
| "loss": 1.0465, |
| "step": 2212 |
| }, |
| { |
| "epoch": 0.06457825699986869, |
| "grad_norm": 0.33683425188064575, |
| "learning_rate": 2.8190168560200317e-06, |
| "loss": 0.4135, |
| "step": 2213 |
| }, |
| { |
| "epoch": 0.06460743831798882, |
| "grad_norm": 0.3639439642429352, |
| "learning_rate": 2.814869021745166e-06, |
| "loss": 0.7474, |
| "step": 2214 |
| }, |
| { |
| "epoch": 0.06463661963610896, |
| "grad_norm": 0.42142772674560547, |
| "learning_rate": 2.810723045415591e-06, |
| "loss": 1.6713, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.06466580095422911, |
| "grad_norm": 0.3934900462627411, |
| "learning_rate": 2.8065789305564955e-06, |
| "loss": 1.0515, |
| "step": 2216 |
| }, |
| { |
| "epoch": 0.06469498227234924, |
| "grad_norm": 0.39165040850639343, |
| "learning_rate": 2.8024366806914822e-06, |
| "loss": 0.9648, |
| "step": 2217 |
| }, |
| { |
| "epoch": 0.06472416359046938, |
| "grad_norm": 0.3803808391094208, |
| "learning_rate": 2.7982962993425733e-06, |
| "loss": 1.0856, |
| "step": 2218 |
| }, |
| { |
| "epoch": 0.06475334490858953, |
| "grad_norm": 0.35796719789505005, |
| "learning_rate": 2.7941577900301974e-06, |
| "loss": 1.0331, |
| "step": 2219 |
| }, |
| { |
| "epoch": 0.06478252622670966, |
| "grad_norm": 0.3499535918235779, |
| "learning_rate": 2.7900211562731923e-06, |
| "loss": 0.4101, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.0648117075448298, |
| "grad_norm": 0.39795583486557007, |
| "learning_rate": 2.785886401588803e-06, |
| "loss": 0.6383, |
| "step": 2221 |
| }, |
| { |
| "epoch": 0.06484088886294993, |
| "grad_norm": 0.3923657238483429, |
| "learning_rate": 2.781753529492679e-06, |
| "loss": 1.0316, |
| "step": 2222 |
| }, |
| { |
| "epoch": 0.06487007018107008, |
| "grad_norm": 0.30667707324028015, |
| "learning_rate": 2.7776225434988618e-06, |
| "loss": 0.3679, |
| "step": 2223 |
| }, |
| { |
| "epoch": 0.06489925149919022, |
| "grad_norm": 0.37963777780532837, |
| "learning_rate": 2.773493447119797e-06, |
| "loss": 0.5842, |
| "step": 2224 |
| }, |
| { |
| "epoch": 0.06492843281731035, |
| "grad_norm": 0.3844693601131439, |
| "learning_rate": 2.7693662438663184e-06, |
| "loss": 1.7158, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.0649576141354305, |
| "grad_norm": 0.45642632246017456, |
| "learning_rate": 2.7652409372476495e-06, |
| "loss": 0.7383, |
| "step": 2226 |
| }, |
| { |
| "epoch": 0.06498679545355064, |
| "grad_norm": 0.3510311245918274, |
| "learning_rate": 2.7611175307714056e-06, |
| "loss": 1.1575, |
| "step": 2227 |
| }, |
| { |
| "epoch": 0.06501597677167077, |
| "grad_norm": 0.44529280066490173, |
| "learning_rate": 2.756996027943586e-06, |
| "loss": 0.6211, |
| "step": 2228 |
| }, |
| { |
| "epoch": 0.06504515808979092, |
| "grad_norm": 0.3660694658756256, |
| "learning_rate": 2.7528764322685658e-06, |
| "loss": 0.7597, |
| "step": 2229 |
| }, |
| { |
| "epoch": 0.06507433940791106, |
| "grad_norm": 0.3575831949710846, |
| "learning_rate": 2.7487587472491063e-06, |
| "loss": 0.6024, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.06510352072603119, |
| "grad_norm": 0.3872736096382141, |
| "learning_rate": 2.7446429763863376e-06, |
| "loss": 0.891, |
| "step": 2231 |
| }, |
| { |
| "epoch": 0.06513270204415134, |
| "grad_norm": 0.5099484920501709, |
| "learning_rate": 2.7405291231797647e-06, |
| "loss": 0.7399, |
| "step": 2232 |
| }, |
| { |
| "epoch": 0.06516188336227148, |
| "grad_norm": 0.38370198011398315, |
| "learning_rate": 2.73641719112726e-06, |
| "loss": 0.6208, |
| "step": 2233 |
| }, |
| { |
| "epoch": 0.06519106468039161, |
| "grad_norm": 0.3455471992492676, |
| "learning_rate": 2.73230718372507e-06, |
| "loss": 1.1602, |
| "step": 2234 |
| }, |
| { |
| "epoch": 0.06522024599851176, |
| "grad_norm": 0.3221728503704071, |
| "learning_rate": 2.728199104467797e-06, |
| "loss": 1.4959, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.06524942731663189, |
| "grad_norm": 0.4023864269256592, |
| "learning_rate": 2.724092956848404e-06, |
| "loss": 0.6504, |
| "step": 2236 |
| }, |
| { |
| "epoch": 0.06527860863475203, |
| "grad_norm": 0.39062392711639404, |
| "learning_rate": 2.719988744358218e-06, |
| "loss": 0.503, |
| "step": 2237 |
| }, |
| { |
| "epoch": 0.06530778995287218, |
| "grad_norm": 0.5177934765815735, |
| "learning_rate": 2.715886470486914e-06, |
| "loss": 1.5162, |
| "step": 2238 |
| }, |
| { |
| "epoch": 0.0653369712709923, |
| "grad_norm": 0.34752145409584045, |
| "learning_rate": 2.7117861387225186e-06, |
| "loss": 0.5226, |
| "step": 2239 |
| }, |
| { |
| "epoch": 0.06536615258911245, |
| "grad_norm": 0.3906431496143341, |
| "learning_rate": 2.7076877525514133e-06, |
| "loss": 1.1531, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.0653953339072326, |
| "grad_norm": 0.65245521068573, |
| "learning_rate": 2.7035913154583217e-06, |
| "loss": 1.3133, |
| "step": 2241 |
| }, |
| { |
| "epoch": 0.06542451522535273, |
| "grad_norm": 0.38677480816841125, |
| "learning_rate": 2.699496830926307e-06, |
| "loss": 1.1464, |
| "step": 2242 |
| }, |
| { |
| "epoch": 0.06545369654347287, |
| "grad_norm": 0.3395407497882843, |
| "learning_rate": 2.695404302436779e-06, |
| "loss": 0.47, |
| "step": 2243 |
| }, |
| { |
| "epoch": 0.06548287786159301, |
| "grad_norm": 0.44159018993377686, |
| "learning_rate": 2.6913137334694796e-06, |
| "loss": 1.1442, |
| "step": 2244 |
| }, |
| { |
| "epoch": 0.06551205917971314, |
| "grad_norm": 0.3402751088142395, |
| "learning_rate": 2.6872251275024825e-06, |
| "loss": 0.5235, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.06554124049783329, |
| "grad_norm": 0.3944838047027588, |
| "learning_rate": 2.6831384880121973e-06, |
| "loss": 0.7692, |
| "step": 2246 |
| }, |
| { |
| "epoch": 0.06557042181595343, |
| "grad_norm": 0.3907717168331146, |
| "learning_rate": 2.679053818473363e-06, |
| "loss": 0.512, |
| "step": 2247 |
| }, |
| { |
| "epoch": 0.06559960313407356, |
| "grad_norm": 0.38240498304367065, |
| "learning_rate": 2.6749711223590347e-06, |
| "loss": 0.5553, |
| "step": 2248 |
| }, |
| { |
| "epoch": 0.06562878445219371, |
| "grad_norm": 0.501410961151123, |
| "learning_rate": 2.670890403140599e-06, |
| "loss": 0.6919, |
| "step": 2249 |
| }, |
| { |
| "epoch": 0.06565796577031384, |
| "grad_norm": 0.4064303934574127, |
| "learning_rate": 2.666811664287758e-06, |
| "loss": 0.693, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.06568714708843398, |
| "grad_norm": 0.39699289202690125, |
| "learning_rate": 2.662734909268526e-06, |
| "loss": 0.609, |
| "step": 2251 |
| }, |
| { |
| "epoch": 0.06571632840655413, |
| "grad_norm": 0.3920840620994568, |
| "learning_rate": 2.6586601415492363e-06, |
| "loss": 1.2953, |
| "step": 2252 |
| }, |
| { |
| "epoch": 0.06574550972467426, |
| "grad_norm": 0.3655044734477997, |
| "learning_rate": 2.654587364594533e-06, |
| "loss": 0.6042, |
| "step": 2253 |
| }, |
| { |
| "epoch": 0.0657746910427944, |
| "grad_norm": 0.41194257140159607, |
| "learning_rate": 2.6505165818673624e-06, |
| "loss": 0.7419, |
| "step": 2254 |
| }, |
| { |
| "epoch": 0.06580387236091455, |
| "grad_norm": 0.36357811093330383, |
| "learning_rate": 2.646447796828976e-06, |
| "loss": 0.6074, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.06583305367903468, |
| "grad_norm": 0.5454866886138916, |
| "learning_rate": 2.6423810129389323e-06, |
| "loss": 1.2095, |
| "step": 2256 |
| }, |
| { |
| "epoch": 0.06586223499715482, |
| "grad_norm": 0.3475554883480072, |
| "learning_rate": 2.6383162336550806e-06, |
| "loss": 0.5402, |
| "step": 2257 |
| }, |
| { |
| "epoch": 0.06589141631527497, |
| "grad_norm": 0.44257158041000366, |
| "learning_rate": 2.6342534624335715e-06, |
| "loss": 0.3333, |
| "step": 2258 |
| }, |
| { |
| "epoch": 0.0659205976333951, |
| "grad_norm": 0.3420020043849945, |
| "learning_rate": 2.6301927027288484e-06, |
| "loss": 0.9797, |
| "step": 2259 |
| }, |
| { |
| "epoch": 0.06594977895151524, |
| "grad_norm": 0.31716400384902954, |
| "learning_rate": 2.6261339579936395e-06, |
| "loss": 0.9979, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.06597896026963537, |
| "grad_norm": 0.36307722330093384, |
| "learning_rate": 2.622077231678962e-06, |
| "loss": 0.3978, |
| "step": 2261 |
| }, |
| { |
| "epoch": 0.06600814158775552, |
| "grad_norm": 0.36089351773262024, |
| "learning_rate": 2.6180225272341197e-06, |
| "loss": 0.6354, |
| "step": 2262 |
| }, |
| { |
| "epoch": 0.06603732290587566, |
| "grad_norm": 0.35729721188545227, |
| "learning_rate": 2.6139698481066946e-06, |
| "loss": 0.5067, |
| "step": 2263 |
| }, |
| { |
| "epoch": 0.06606650422399579, |
| "grad_norm": 0.40199124813079834, |
| "learning_rate": 2.6099191977425432e-06, |
| "loss": 0.777, |
| "step": 2264 |
| }, |
| { |
| "epoch": 0.06609568554211594, |
| "grad_norm": 0.3911895751953125, |
| "learning_rate": 2.6058705795858046e-06, |
| "loss": 1.2911, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.06612486686023608, |
| "grad_norm": 0.37374261021614075, |
| "learning_rate": 2.601823997078887e-06, |
| "loss": 1.1075, |
| "step": 2266 |
| }, |
| { |
| "epoch": 0.06615404817835621, |
| "grad_norm": 0.31036147475242615, |
| "learning_rate": 2.5977794536624634e-06, |
| "loss": 0.5188, |
| "step": 2267 |
| }, |
| { |
| "epoch": 0.06618322949647636, |
| "grad_norm": 0.5836422443389893, |
| "learning_rate": 2.5937369527754807e-06, |
| "loss": 0.968, |
| "step": 2268 |
| }, |
| { |
| "epoch": 0.0662124108145965, |
| "grad_norm": 0.38057446479797363, |
| "learning_rate": 2.5896964978551427e-06, |
| "loss": 0.6429, |
| "step": 2269 |
| }, |
| { |
| "epoch": 0.06624159213271663, |
| "grad_norm": 0.3659692406654358, |
| "learning_rate": 2.5856580923369134e-06, |
| "loss": 0.6627, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.06627077345083678, |
| "grad_norm": 0.4865352213382721, |
| "learning_rate": 2.5816217396545195e-06, |
| "loss": 0.6002, |
| "step": 2271 |
| }, |
| { |
| "epoch": 0.06629995476895692, |
| "grad_norm": 0.3877970576286316, |
| "learning_rate": 2.5775874432399415e-06, |
| "loss": 0.6113, |
| "step": 2272 |
| }, |
| { |
| "epoch": 0.06632913608707705, |
| "grad_norm": 0.35497722029685974, |
| "learning_rate": 2.5735552065234055e-06, |
| "loss": 0.5408, |
| "step": 2273 |
| }, |
| { |
| "epoch": 0.0663583174051972, |
| "grad_norm": 0.37536606192588806, |
| "learning_rate": 2.5695250329333933e-06, |
| "loss": 0.4918, |
| "step": 2274 |
| }, |
| { |
| "epoch": 0.06638749872331733, |
| "grad_norm": 0.3913287818431854, |
| "learning_rate": 2.565496925896629e-06, |
| "loss": 0.5933, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.06641668004143747, |
| "grad_norm": 0.43948641419410706, |
| "learning_rate": 2.5614708888380767e-06, |
| "loss": 0.5911, |
| "step": 2276 |
| }, |
| { |
| "epoch": 0.06644586135955761, |
| "grad_norm": 0.3500136733055115, |
| "learning_rate": 2.5574469251809463e-06, |
| "loss": 1.0668, |
| "step": 2277 |
| }, |
| { |
| "epoch": 0.06647504267767775, |
| "grad_norm": 0.5272991061210632, |
| "learning_rate": 2.553425038346684e-06, |
| "loss": 1.3124, |
| "step": 2278 |
| }, |
| { |
| "epoch": 0.06650422399579789, |
| "grad_norm": 0.3930307626724243, |
| "learning_rate": 2.549405231754968e-06, |
| "loss": 0.6088, |
| "step": 2279 |
| }, |
| { |
| "epoch": 0.06653340531391803, |
| "grad_norm": 0.3676909804344177, |
| "learning_rate": 2.545387508823704e-06, |
| "loss": 1.2554, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.06656258663203816, |
| "grad_norm": 0.38661840558052063, |
| "learning_rate": 2.5413718729690353e-06, |
| "loss": 1.2363, |
| "step": 2281 |
| }, |
| { |
| "epoch": 0.06659176795015831, |
| "grad_norm": 0.36813339591026306, |
| "learning_rate": 2.5373583276053217e-06, |
| "loss": 0.4441, |
| "step": 2282 |
| }, |
| { |
| "epoch": 0.06662094926827845, |
| "grad_norm": 0.37831440567970276, |
| "learning_rate": 2.5333468761451507e-06, |
| "loss": 0.5864, |
| "step": 2283 |
| }, |
| { |
| "epoch": 0.06665013058639858, |
| "grad_norm": 0.37219545245170593, |
| "learning_rate": 2.5293375219993304e-06, |
| "loss": 1.0913, |
| "step": 2284 |
| }, |
| { |
| "epoch": 0.06667931190451873, |
| "grad_norm": 0.5311377644538879, |
| "learning_rate": 2.525330268576881e-06, |
| "loss": 0.9739, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.06670849322263887, |
| "grad_norm": 0.3427966833114624, |
| "learning_rate": 2.5213251192850372e-06, |
| "loss": 0.7041, |
| "step": 2286 |
| }, |
| { |
| "epoch": 0.066737674540759, |
| "grad_norm": 0.5137971043586731, |
| "learning_rate": 2.5173220775292507e-06, |
| "loss": 1.2092, |
| "step": 2287 |
| }, |
| { |
| "epoch": 0.06676685585887915, |
| "grad_norm": 0.4096526801586151, |
| "learning_rate": 2.5133211467131725e-06, |
| "loss": 0.645, |
| "step": 2288 |
| }, |
| { |
| "epoch": 0.06679603717699928, |
| "grad_norm": 0.41095679998397827, |
| "learning_rate": 2.509322330238664e-06, |
| "loss": 0.9699, |
| "step": 2289 |
| }, |
| { |
| "epoch": 0.06682521849511942, |
| "grad_norm": 0.49337074160575867, |
| "learning_rate": 2.5053256315057904e-06, |
| "loss": 0.4812, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.06685439981323957, |
| "grad_norm": 0.42444032430648804, |
| "learning_rate": 2.5013310539128122e-06, |
| "loss": 0.5461, |
| "step": 2291 |
| }, |
| { |
| "epoch": 0.0668835811313597, |
| "grad_norm": 0.3968393802642822, |
| "learning_rate": 2.497338600856185e-06, |
| "loss": 0.9629, |
| "step": 2292 |
| }, |
| { |
| "epoch": 0.06691276244947984, |
| "grad_norm": 0.3901437520980835, |
| "learning_rate": 2.493348275730566e-06, |
| "loss": 0.5674, |
| "step": 2293 |
| }, |
| { |
| "epoch": 0.06694194376759999, |
| "grad_norm": 0.6049697995185852, |
| "learning_rate": 2.4893600819287955e-06, |
| "loss": 1.7119, |
| "step": 2294 |
| }, |
| { |
| "epoch": 0.06697112508572012, |
| "grad_norm": 0.3915485143661499, |
| "learning_rate": 2.4853740228419016e-06, |
| "loss": 0.5524, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.06700030640384026, |
| "grad_norm": 0.44735613465309143, |
| "learning_rate": 2.481390101859102e-06, |
| "loss": 0.6258, |
| "step": 2296 |
| }, |
| { |
| "epoch": 0.0670294877219604, |
| "grad_norm": 0.7665141820907593, |
| "learning_rate": 2.4774083223677962e-06, |
| "loss": 0.6041, |
| "step": 2297 |
| }, |
| { |
| "epoch": 0.06705866904008054, |
| "grad_norm": 0.3770742416381836, |
| "learning_rate": 2.473428687753559e-06, |
| "loss": 0.4378, |
| "step": 2298 |
| }, |
| { |
| "epoch": 0.06708785035820068, |
| "grad_norm": 0.3503175377845764, |
| "learning_rate": 2.4694512014001415e-06, |
| "loss": 0.4914, |
| "step": 2299 |
| }, |
| { |
| "epoch": 0.06711703167632083, |
| "grad_norm": 0.39997512102127075, |
| "learning_rate": 2.4654758666894737e-06, |
| "loss": 0.7328, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.06714621299444096, |
| "grad_norm": 0.40389999747276306, |
| "learning_rate": 2.4615026870016485e-06, |
| "loss": 1.1988, |
| "step": 2301 |
| }, |
| { |
| "epoch": 0.0671753943125611, |
| "grad_norm": 0.3892918825149536, |
| "learning_rate": 2.457531665714933e-06, |
| "loss": 0.5576, |
| "step": 2302 |
| }, |
| { |
| "epoch": 0.06720457563068123, |
| "grad_norm": 0.42479056119918823, |
| "learning_rate": 2.4535628062057575e-06, |
| "loss": 0.8021, |
| "step": 2303 |
| }, |
| { |
| "epoch": 0.06723375694880138, |
| "grad_norm": 0.3095262050628662, |
| "learning_rate": 2.4495961118487115e-06, |
| "loss": 1.083, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.06726293826692152, |
| "grad_norm": 0.3844510018825531, |
| "learning_rate": 2.445631586016544e-06, |
| "loss": 0.5071, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.06729211958504165, |
| "grad_norm": 0.35495898127555847, |
| "learning_rate": 2.4416692320801654e-06, |
| "loss": 0.6278, |
| "step": 2306 |
| }, |
| { |
| "epoch": 0.0673213009031618, |
| "grad_norm": 0.31097471714019775, |
| "learning_rate": 2.437709053408631e-06, |
| "loss": 0.3857, |
| "step": 2307 |
| }, |
| { |
| "epoch": 0.06735048222128194, |
| "grad_norm": 0.469840943813324, |
| "learning_rate": 2.4337510533691527e-06, |
| "loss": 1.0183, |
| "step": 2308 |
| }, |
| { |
| "epoch": 0.06737966353940207, |
| "grad_norm": 0.38367390632629395, |
| "learning_rate": 2.4297952353270904e-06, |
| "loss": 0.5347, |
| "step": 2309 |
| }, |
| { |
| "epoch": 0.06740884485752222, |
| "grad_norm": 0.5822371244430542, |
| "learning_rate": 2.4258416026459453e-06, |
| "loss": 0.8687, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.06743802617564236, |
| "grad_norm": 0.352183073759079, |
| "learning_rate": 2.421890158687359e-06, |
| "loss": 0.5031, |
| "step": 2311 |
| }, |
| { |
| "epoch": 0.06746720749376249, |
| "grad_norm": 0.36535561084747314, |
| "learning_rate": 2.4179409068111193e-06, |
| "loss": 0.992, |
| "step": 2312 |
| }, |
| { |
| "epoch": 0.06749638881188263, |
| "grad_norm": 0.3759225606918335, |
| "learning_rate": 2.4139938503751416e-06, |
| "loss": 0.9736, |
| "step": 2313 |
| }, |
| { |
| "epoch": 0.06752557013000277, |
| "grad_norm": 0.37635865807533264, |
| "learning_rate": 2.4100489927354805e-06, |
| "loss": 0.505, |
| "step": 2314 |
| }, |
| { |
| "epoch": 0.06755475144812291, |
| "grad_norm": 0.347231388092041, |
| "learning_rate": 2.406106337246321e-06, |
| "loss": 1.0841, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.06758393276624305, |
| "grad_norm": 0.3201952576637268, |
| "learning_rate": 2.4021658872599727e-06, |
| "loss": 0.4921, |
| "step": 2316 |
| }, |
| { |
| "epoch": 0.06761311408436319, |
| "grad_norm": 0.4974895119667053, |
| "learning_rate": 2.3982276461268706e-06, |
| "loss": 1.7955, |
| "step": 2317 |
| }, |
| { |
| "epoch": 0.06764229540248333, |
| "grad_norm": 0.367649108171463, |
| "learning_rate": 2.3942916171955707e-06, |
| "loss": 0.5996, |
| "step": 2318 |
| }, |
| { |
| "epoch": 0.06767147672060347, |
| "grad_norm": 0.35782673954963684, |
| "learning_rate": 2.3903578038127505e-06, |
| "loss": 1.0778, |
| "step": 2319 |
| }, |
| { |
| "epoch": 0.0677006580387236, |
| "grad_norm": 0.544367790222168, |
| "learning_rate": 2.3864262093232056e-06, |
| "loss": 0.7105, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.06772983935684375, |
| "grad_norm": 0.38884636759757996, |
| "learning_rate": 2.382496837069837e-06, |
| "loss": 0.7438, |
| "step": 2321 |
| }, |
| { |
| "epoch": 0.0677590206749639, |
| "grad_norm": 0.3445921540260315, |
| "learning_rate": 2.3785696903936657e-06, |
| "loss": 0.8839, |
| "step": 2322 |
| }, |
| { |
| "epoch": 0.06778820199308402, |
| "grad_norm": 0.3551780879497528, |
| "learning_rate": 2.3746447726338147e-06, |
| "loss": 0.9396, |
| "step": 2323 |
| }, |
| { |
| "epoch": 0.06781738331120417, |
| "grad_norm": 0.3849031329154968, |
| "learning_rate": 2.3707220871275096e-06, |
| "loss": 0.8683, |
| "step": 2324 |
| }, |
| { |
| "epoch": 0.06784656462932431, |
| "grad_norm": 0.4329202473163605, |
| "learning_rate": 2.366801637210086e-06, |
| "loss": 0.8645, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.06787574594744444, |
| "grad_norm": 0.37595221400260925, |
| "learning_rate": 2.3628834262149703e-06, |
| "loss": 0.6043, |
| "step": 2326 |
| }, |
| { |
| "epoch": 0.06790492726556459, |
| "grad_norm": 0.4238578677177429, |
| "learning_rate": 2.358967457473691e-06, |
| "loss": 1.0188, |
| "step": 2327 |
| }, |
| { |
| "epoch": 0.06793410858368472, |
| "grad_norm": 0.3212074041366577, |
| "learning_rate": 2.355053734315869e-06, |
| "loss": 0.4843, |
| "step": 2328 |
| }, |
| { |
| "epoch": 0.06796328990180486, |
| "grad_norm": 0.34448492527008057, |
| "learning_rate": 2.3511422600692143e-06, |
| "loss": 0.5145, |
| "step": 2329 |
| }, |
| { |
| "epoch": 0.06799247121992501, |
| "grad_norm": 0.3330211341381073, |
| "learning_rate": 2.3472330380595226e-06, |
| "loss": 0.6125, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.06802165253804514, |
| "grad_norm": 0.37217503786087036, |
| "learning_rate": 2.3433260716106814e-06, |
| "loss": 0.5304, |
| "step": 2331 |
| }, |
| { |
| "epoch": 0.06805083385616528, |
| "grad_norm": 0.3429581820964813, |
| "learning_rate": 2.339421364044652e-06, |
| "loss": 0.4732, |
| "step": 2332 |
| }, |
| { |
| "epoch": 0.06808001517428543, |
| "grad_norm": 0.4218696653842926, |
| "learning_rate": 2.3355189186814818e-06, |
| "loss": 1.154, |
| "step": 2333 |
| }, |
| { |
| "epoch": 0.06810919649240556, |
| "grad_norm": 0.4170651435852051, |
| "learning_rate": 2.3316187388392943e-06, |
| "loss": 1.0605, |
| "step": 2334 |
| }, |
| { |
| "epoch": 0.0681383778105257, |
| "grad_norm": 0.38038069009780884, |
| "learning_rate": 2.327720827834282e-06, |
| "loss": 0.8296, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.06816755912864585, |
| "grad_norm": 0.3588862717151642, |
| "learning_rate": 2.3238251889807096e-06, |
| "loss": 0.5871, |
| "step": 2336 |
| }, |
| { |
| "epoch": 0.06819674044676598, |
| "grad_norm": 0.41848042607307434, |
| "learning_rate": 2.319931825590915e-06, |
| "loss": 0.7, |
| "step": 2337 |
| }, |
| { |
| "epoch": 0.06822592176488612, |
| "grad_norm": 0.35904598236083984, |
| "learning_rate": 2.316040740975294e-06, |
| "loss": 0.7135, |
| "step": 2338 |
| }, |
| { |
| "epoch": 0.06825510308300627, |
| "grad_norm": 0.34999504685401917, |
| "learning_rate": 2.312151938442311e-06, |
| "loss": 0.519, |
| "step": 2339 |
| }, |
| { |
| "epoch": 0.0682842844011264, |
| "grad_norm": 0.32720765471458435, |
| "learning_rate": 2.3082654212984845e-06, |
| "loss": 0.4237, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.06831346571924654, |
| "grad_norm": 0.350627601146698, |
| "learning_rate": 2.3043811928483965e-06, |
| "loss": 0.6813, |
| "step": 2341 |
| }, |
| { |
| "epoch": 0.06834264703736667, |
| "grad_norm": 0.4011843800544739, |
| "learning_rate": 2.3004992563946776e-06, |
| "loss": 0.6361, |
| "step": 2342 |
| }, |
| { |
| "epoch": 0.06837182835548682, |
| "grad_norm": 0.3468932509422302, |
| "learning_rate": 2.296619615238009e-06, |
| "loss": 0.3576, |
| "step": 2343 |
| }, |
| { |
| "epoch": 0.06840100967360696, |
| "grad_norm": 0.4590185582637787, |
| "learning_rate": 2.292742272677124e-06, |
| "loss": 0.8496, |
| "step": 2344 |
| }, |
| { |
| "epoch": 0.06843019099172709, |
| "grad_norm": 0.5163765549659729, |
| "learning_rate": 2.2888672320088023e-06, |
| "loss": 1.0052, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.06845937230984724, |
| "grad_norm": 0.4050164520740509, |
| "learning_rate": 2.2849944965278604e-06, |
| "loss": 0.5167, |
| "step": 2346 |
| }, |
| { |
| "epoch": 0.06848855362796738, |
| "grad_norm": 0.42852064967155457, |
| "learning_rate": 2.281124069527162e-06, |
| "loss": 1.7452, |
| "step": 2347 |
| }, |
| { |
| "epoch": 0.06851773494608751, |
| "grad_norm": 0.4135974645614624, |
| "learning_rate": 2.277255954297602e-06, |
| "loss": 1.1251, |
| "step": 2348 |
| }, |
| { |
| "epoch": 0.06854691626420766, |
| "grad_norm": 0.4409804046154022, |
| "learning_rate": 2.2733901541281105e-06, |
| "loss": 0.732, |
| "step": 2349 |
| }, |
| { |
| "epoch": 0.0685760975823278, |
| "grad_norm": 0.37079402804374695, |
| "learning_rate": 2.2695266723056555e-06, |
| "loss": 1.0499, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.06860527890044793, |
| "grad_norm": 0.36414167284965515, |
| "learning_rate": 2.265665512115223e-06, |
| "loss": 0.5811, |
| "step": 2351 |
| }, |
| { |
| "epoch": 0.06863446021856807, |
| "grad_norm": 0.4887774586677551, |
| "learning_rate": 2.2618066768398355e-06, |
| "loss": 1.1393, |
| "step": 2352 |
| }, |
| { |
| "epoch": 0.06866364153668822, |
| "grad_norm": 0.39137986302375793, |
| "learning_rate": 2.2579501697605346e-06, |
| "loss": 0.4161, |
| "step": 2353 |
| }, |
| { |
| "epoch": 0.06869282285480835, |
| "grad_norm": 0.4010322690010071, |
| "learning_rate": 2.2540959941563807e-06, |
| "loss": 0.6601, |
| "step": 2354 |
| }, |
| { |
| "epoch": 0.0687220041729285, |
| "grad_norm": 0.39666450023651123, |
| "learning_rate": 2.2502441533044504e-06, |
| "loss": 0.6234, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.06875118549104862, |
| "grad_norm": 0.34849557280540466, |
| "learning_rate": 2.246394650479842e-06, |
| "loss": 0.5125, |
| "step": 2356 |
| }, |
| { |
| "epoch": 0.06878036680916877, |
| "grad_norm": 0.3707070052623749, |
| "learning_rate": 2.242547488955658e-06, |
| "loss": 0.4645, |
| "step": 2357 |
| }, |
| { |
| "epoch": 0.06880954812728891, |
| "grad_norm": 0.37178662419319153, |
| "learning_rate": 2.2387026720030157e-06, |
| "loss": 1.0765, |
| "step": 2358 |
| }, |
| { |
| "epoch": 0.06883872944540904, |
| "grad_norm": 0.38042861223220825, |
| "learning_rate": 2.2348602028910387e-06, |
| "loss": 0.6082, |
| "step": 2359 |
| }, |
| { |
| "epoch": 0.06886791076352919, |
| "grad_norm": 0.48389261960983276, |
| "learning_rate": 2.2310200848868515e-06, |
| "loss": 0.5891, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.06889709208164933, |
| "grad_norm": 0.4756917357444763, |
| "learning_rate": 2.2271823212555792e-06, |
| "loss": 1.1732, |
| "step": 2361 |
| }, |
| { |
| "epoch": 0.06892627339976946, |
| "grad_norm": 0.4094328284263611, |
| "learning_rate": 2.2233469152603453e-06, |
| "loss": 0.5288, |
| "step": 2362 |
| }, |
| { |
| "epoch": 0.06895545471788961, |
| "grad_norm": 0.45187872648239136, |
| "learning_rate": 2.2195138701622727e-06, |
| "loss": 0.9786, |
| "step": 2363 |
| }, |
| { |
| "epoch": 0.06898463603600975, |
| "grad_norm": 0.3738536834716797, |
| "learning_rate": 2.2156831892204743e-06, |
| "loss": 1.1804, |
| "step": 2364 |
| }, |
| { |
| "epoch": 0.06901381735412988, |
| "grad_norm": 0.3664667010307312, |
| "learning_rate": 2.21185487569205e-06, |
| "loss": 1.0862, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.06904299867225003, |
| "grad_norm": 0.4061928987503052, |
| "learning_rate": 2.208028932832093e-06, |
| "loss": 0.66, |
| "step": 2366 |
| }, |
| { |
| "epoch": 0.06907217999037016, |
| "grad_norm": 0.3443855047225952, |
| "learning_rate": 2.204205363893675e-06, |
| "loss": 0.6017, |
| "step": 2367 |
| }, |
| { |
| "epoch": 0.0691013613084903, |
| "grad_norm": 0.38931167125701904, |
| "learning_rate": 2.2003841721278498e-06, |
| "loss": 0.6349, |
| "step": 2368 |
| }, |
| { |
| "epoch": 0.06913054262661045, |
| "grad_norm": 0.37855663895606995, |
| "learning_rate": 2.196565360783653e-06, |
| "loss": 0.7399, |
| "step": 2369 |
| }, |
| { |
| "epoch": 0.06915972394473058, |
| "grad_norm": 0.39310422539711, |
| "learning_rate": 2.192748933108097e-06, |
| "loss": 0.7429, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.06918890526285072, |
| "grad_norm": 0.40750426054000854, |
| "learning_rate": 2.188934892346162e-06, |
| "loss": 0.6942, |
| "step": 2371 |
| }, |
| { |
| "epoch": 0.06921808658097087, |
| "grad_norm": 0.3550734221935272, |
| "learning_rate": 2.185123241740804e-06, |
| "loss": 0.7248, |
| "step": 2372 |
| }, |
| { |
| "epoch": 0.069247267899091, |
| "grad_norm": 0.42732638120651245, |
| "learning_rate": 2.181313984532945e-06, |
| "loss": 0.735, |
| "step": 2373 |
| }, |
| { |
| "epoch": 0.06927644921721114, |
| "grad_norm": 0.32182979583740234, |
| "learning_rate": 2.177507123961468e-06, |
| "loss": 0.7875, |
| "step": 2374 |
| }, |
| { |
| "epoch": 0.06930563053533129, |
| "grad_norm": 0.38181596994400024, |
| "learning_rate": 2.1737026632632253e-06, |
| "loss": 0.7326, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.06933481185345142, |
| "grad_norm": 0.36936119198799133, |
| "learning_rate": 2.1699006056730254e-06, |
| "loss": 0.748, |
| "step": 2376 |
| }, |
| { |
| "epoch": 0.06936399317157156, |
| "grad_norm": 0.3905118703842163, |
| "learning_rate": 2.1661009544236306e-06, |
| "loss": 0.4721, |
| "step": 2377 |
| }, |
| { |
| "epoch": 0.0693931744896917, |
| "grad_norm": 0.4434422552585602, |
| "learning_rate": 2.162303712745763e-06, |
| "loss": 0.7692, |
| "step": 2378 |
| }, |
| { |
| "epoch": 0.06942235580781184, |
| "grad_norm": 0.37523123621940613, |
| "learning_rate": 2.1585088838680918e-06, |
| "loss": 0.6387, |
| "step": 2379 |
| }, |
| { |
| "epoch": 0.06945153712593198, |
| "grad_norm": 0.5466306805610657, |
| "learning_rate": 2.1547164710172355e-06, |
| "loss": 1.0577, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.06948071844405211, |
| "grad_norm": 0.5337136387825012, |
| "learning_rate": 2.150926477417755e-06, |
| "loss": 0.7623, |
| "step": 2381 |
| }, |
| { |
| "epoch": 0.06950989976217226, |
| "grad_norm": 0.3621442914009094, |
| "learning_rate": 2.147138906292162e-06, |
| "loss": 0.6123, |
| "step": 2382 |
| }, |
| { |
| "epoch": 0.0695390810802924, |
| "grad_norm": 0.3321429193019867, |
| "learning_rate": 2.1433537608609045e-06, |
| "loss": 0.4981, |
| "step": 2383 |
| }, |
| { |
| "epoch": 0.06956826239841253, |
| "grad_norm": 0.4353511333465576, |
| "learning_rate": 2.139571044342365e-06, |
| "loss": 0.6948, |
| "step": 2384 |
| }, |
| { |
| "epoch": 0.06959744371653268, |
| "grad_norm": 0.4095340967178345, |
| "learning_rate": 2.135790759952866e-06, |
| "loss": 0.6777, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.06962662503465282, |
| "grad_norm": 0.35531842708587646, |
| "learning_rate": 2.132012910906658e-06, |
| "loss": 0.5955, |
| "step": 2386 |
| }, |
| { |
| "epoch": 0.06965580635277295, |
| "grad_norm": 0.39012444019317627, |
| "learning_rate": 2.128237500415921e-06, |
| "loss": 0.5671, |
| "step": 2387 |
| }, |
| { |
| "epoch": 0.0696849876708931, |
| "grad_norm": 0.3363099992275238, |
| "learning_rate": 2.124464531690764e-06, |
| "loss": 0.4879, |
| "step": 2388 |
| }, |
| { |
| "epoch": 0.06971416898901324, |
| "grad_norm": 0.34853294491767883, |
| "learning_rate": 2.120694007939221e-06, |
| "loss": 0.397, |
| "step": 2389 |
| }, |
| { |
| "epoch": 0.06974335030713337, |
| "grad_norm": 0.3768669068813324, |
| "learning_rate": 2.116925932367241e-06, |
| "loss": 0.5866, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.06977253162525351, |
| "grad_norm": 0.3497234284877777, |
| "learning_rate": 2.113160308178699e-06, |
| "loss": 0.5854, |
| "step": 2391 |
| }, |
| { |
| "epoch": 0.06980171294337366, |
| "grad_norm": 0.3704819083213806, |
| "learning_rate": 2.10939713857538e-06, |
| "loss": 1.571, |
| "step": 2392 |
| }, |
| { |
| "epoch": 0.06983089426149379, |
| "grad_norm": 0.3682405948638916, |
| "learning_rate": 2.105636426756982e-06, |
| "loss": 0.7302, |
| "step": 2393 |
| }, |
| { |
| "epoch": 0.06986007557961393, |
| "grad_norm": 0.37819990515708923, |
| "learning_rate": 2.1018781759211164e-06, |
| "loss": 0.4996, |
| "step": 2394 |
| }, |
| { |
| "epoch": 0.06988925689773406, |
| "grad_norm": 0.4203060567378998, |
| "learning_rate": 2.0981223892633037e-06, |
| "loss": 1.3888, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.06991843821585421, |
| "grad_norm": 0.4256092607975006, |
| "learning_rate": 2.0943690699769616e-06, |
| "loss": 0.966, |
| "step": 2396 |
| }, |
| { |
| "epoch": 0.06994761953397435, |
| "grad_norm": 0.36608976125717163, |
| "learning_rate": 2.0906182212534186e-06, |
| "loss": 0.5845, |
| "step": 2397 |
| }, |
| { |
| "epoch": 0.06997680085209448, |
| "grad_norm": 0.43183717131614685, |
| "learning_rate": 2.0868698462818963e-06, |
| "loss": 1.6639, |
| "step": 2398 |
| }, |
| { |
| "epoch": 0.07000598217021463, |
| "grad_norm": 0.4833545982837677, |
| "learning_rate": 2.083123948249514e-06, |
| "loss": 0.7423, |
| "step": 2399 |
| }, |
| { |
| "epoch": 0.07003516348833477, |
| "grad_norm": 0.3179115056991577, |
| "learning_rate": 2.079380530341288e-06, |
| "loss": 0.4304, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.0700643448064549, |
| "grad_norm": 0.40356066823005676, |
| "learning_rate": 2.075639595740125e-06, |
| "loss": 0.5732, |
| "step": 2401 |
| }, |
| { |
| "epoch": 0.07009352612457505, |
| "grad_norm": 0.3708016574382782, |
| "learning_rate": 2.0719011476268176e-06, |
| "loss": 0.5915, |
| "step": 2402 |
| }, |
| { |
| "epoch": 0.07012270744269519, |
| "grad_norm": 0.42920756340026855, |
| "learning_rate": 2.0681651891800443e-06, |
| "loss": 0.655, |
| "step": 2403 |
| }, |
| { |
| "epoch": 0.07015188876081532, |
| "grad_norm": 0.36631548404693604, |
| "learning_rate": 2.0644317235763713e-06, |
| "loss": 0.5833, |
| "step": 2404 |
| }, |
| { |
| "epoch": 0.07018107007893547, |
| "grad_norm": 0.5512979030609131, |
| "learning_rate": 2.0607007539902384e-06, |
| "loss": 1.2377, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.07021025139705561, |
| "grad_norm": 0.38115638494491577, |
| "learning_rate": 2.0569722835939695e-06, |
| "loss": 0.5108, |
| "step": 2406 |
| }, |
| { |
| "epoch": 0.07023943271517574, |
| "grad_norm": 0.4353134036064148, |
| "learning_rate": 2.053246315557762e-06, |
| "loss": 0.6421, |
| "step": 2407 |
| }, |
| { |
| "epoch": 0.07026861403329589, |
| "grad_norm": 0.36266690492630005, |
| "learning_rate": 2.0495228530496825e-06, |
| "loss": 0.6908, |
| "step": 2408 |
| }, |
| { |
| "epoch": 0.07029779535141602, |
| "grad_norm": 0.3586747944355011, |
| "learning_rate": 2.0458018992356676e-06, |
| "loss": 0.4107, |
| "step": 2409 |
| }, |
| { |
| "epoch": 0.07032697666953616, |
| "grad_norm": 0.37562692165374756, |
| "learning_rate": 2.0420834572795263e-06, |
| "loss": 1.7169, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.0703561579876563, |
| "grad_norm": 0.4182543158531189, |
| "learning_rate": 2.0383675303429255e-06, |
| "loss": 0.8079, |
| "step": 2411 |
| }, |
| { |
| "epoch": 0.07038533930577644, |
| "grad_norm": 0.4163798987865448, |
| "learning_rate": 2.0346541215853947e-06, |
| "loss": 0.5742, |
| "step": 2412 |
| }, |
| { |
| "epoch": 0.07041452062389658, |
| "grad_norm": 0.37079402804374695, |
| "learning_rate": 2.0309432341643263e-06, |
| "loss": 0.5929, |
| "step": 2413 |
| }, |
| { |
| "epoch": 0.07044370194201673, |
| "grad_norm": 0.35390859842300415, |
| "learning_rate": 2.027234871234967e-06, |
| "loss": 0.6542, |
| "step": 2414 |
| }, |
| { |
| "epoch": 0.07047288326013686, |
| "grad_norm": 0.4579854905605316, |
| "learning_rate": 2.0235290359504135e-06, |
| "loss": 0.9622, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.070502064578257, |
| "grad_norm": 0.31149613857269287, |
| "learning_rate": 2.0198257314616194e-06, |
| "loss": 0.4665, |
| "step": 2416 |
| }, |
| { |
| "epoch": 0.07053124589637715, |
| "grad_norm": 0.34125059843063354, |
| "learning_rate": 2.016124960917381e-06, |
| "loss": 0.4284, |
| "step": 2417 |
| }, |
| { |
| "epoch": 0.07056042721449728, |
| "grad_norm": 0.6271713972091675, |
| "learning_rate": 2.012426727464341e-06, |
| "loss": 0.6522, |
| "step": 2418 |
| }, |
| { |
| "epoch": 0.07058960853261742, |
| "grad_norm": 0.40172070264816284, |
| "learning_rate": 2.008731034246988e-06, |
| "loss": 0.4762, |
| "step": 2419 |
| }, |
| { |
| "epoch": 0.07061878985073755, |
| "grad_norm": 0.37926357984542847, |
| "learning_rate": 2.0050378844076508e-06, |
| "loss": 0.5313, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.0706479711688577, |
| "grad_norm": 0.3509278893470764, |
| "learning_rate": 2.0013472810864894e-06, |
| "loss": 1.0851, |
| "step": 2421 |
| }, |
| { |
| "epoch": 0.07067715248697784, |
| "grad_norm": 0.376741886138916, |
| "learning_rate": 1.997659227421507e-06, |
| "loss": 0.9868, |
| "step": 2422 |
| }, |
| { |
| "epoch": 0.07070633380509797, |
| "grad_norm": 0.38616496324539185, |
| "learning_rate": 1.9939737265485327e-06, |
| "loss": 0.6012, |
| "step": 2423 |
| }, |
| { |
| "epoch": 0.07073551512321812, |
| "grad_norm": 0.47480812668800354, |
| "learning_rate": 1.9902907816012263e-06, |
| "loss": 0.8162, |
| "step": 2424 |
| }, |
| { |
| "epoch": 0.07076469644133826, |
| "grad_norm": 0.35912272334098816, |
| "learning_rate": 1.9866103957110756e-06, |
| "loss": 0.5043, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.07079387775945839, |
| "grad_norm": 0.3571290373802185, |
| "learning_rate": 1.982932572007395e-06, |
| "loss": 0.6168, |
| "step": 2426 |
| }, |
| { |
| "epoch": 0.07082305907757853, |
| "grad_norm": 0.38874393701553345, |
| "learning_rate": 1.9792573136173154e-06, |
| "loss": 0.6684, |
| "step": 2427 |
| }, |
| { |
| "epoch": 0.07085224039569868, |
| "grad_norm": 0.35545557737350464, |
| "learning_rate": 1.9755846236657874e-06, |
| "loss": 0.5503, |
| "step": 2428 |
| }, |
| { |
| "epoch": 0.07088142171381881, |
| "grad_norm": 0.3780122399330139, |
| "learning_rate": 1.971914505275581e-06, |
| "loss": 0.9609, |
| "step": 2429 |
| }, |
| { |
| "epoch": 0.07091060303193895, |
| "grad_norm": 0.37414827942848206, |
| "learning_rate": 1.9682469615672762e-06, |
| "loss": 0.635, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.0709397843500591, |
| "grad_norm": 0.40396106243133545, |
| "learning_rate": 1.964581995659265e-06, |
| "loss": 0.5456, |
| "step": 2431 |
| }, |
| { |
| "epoch": 0.07096896566817923, |
| "grad_norm": 0.3976384401321411, |
| "learning_rate": 1.9609196106677507e-06, |
| "loss": 0.6777, |
| "step": 2432 |
| }, |
| { |
| "epoch": 0.07099814698629937, |
| "grad_norm": 0.3728221654891968, |
| "learning_rate": 1.957259809706738e-06, |
| "loss": 0.5937, |
| "step": 2433 |
| }, |
| { |
| "epoch": 0.0710273283044195, |
| "grad_norm": 0.376641184091568, |
| "learning_rate": 1.9536025958880327e-06, |
| "loss": 0.6591, |
| "step": 2434 |
| }, |
| { |
| "epoch": 0.07105650962253965, |
| "grad_norm": 0.3486475646495819, |
| "learning_rate": 1.949947972321249e-06, |
| "loss": 0.5848, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.0710856909406598, |
| "grad_norm": 0.37088072299957275, |
| "learning_rate": 1.94629594211379e-06, |
| "loss": 1.1408, |
| "step": 2436 |
| }, |
| { |
| "epoch": 0.07111487225877992, |
| "grad_norm": 0.40795472264289856, |
| "learning_rate": 1.942646508370855e-06, |
| "loss": 0.6293, |
| "step": 2437 |
| }, |
| { |
| "epoch": 0.07114405357690007, |
| "grad_norm": 0.4086592197418213, |
| "learning_rate": 1.9389996741954447e-06, |
| "loss": 1.3289, |
| "step": 2438 |
| }, |
| { |
| "epoch": 0.07117323489502021, |
| "grad_norm": 0.33407464623451233, |
| "learning_rate": 1.9353554426883397e-06, |
| "loss": 0.5866, |
| "step": 2439 |
| }, |
| { |
| "epoch": 0.07120241621314034, |
| "grad_norm": 0.3634355962276459, |
| "learning_rate": 1.931713816948108e-06, |
| "loss": 0.67, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.07123159753126049, |
| "grad_norm": 0.3512408137321472, |
| "learning_rate": 1.928074800071109e-06, |
| "loss": 0.4609, |
| "step": 2441 |
| }, |
| { |
| "epoch": 0.07126077884938063, |
| "grad_norm": 0.3987075984477997, |
| "learning_rate": 1.9244383951514776e-06, |
| "loss": 0.5634, |
| "step": 2442 |
| }, |
| { |
| "epoch": 0.07128996016750076, |
| "grad_norm": 0.3397349715232849, |
| "learning_rate": 1.920804605281128e-06, |
| "loss": 0.5324, |
| "step": 2443 |
| }, |
| { |
| "epoch": 0.07131914148562091, |
| "grad_norm": 0.3303639888763428, |
| "learning_rate": 1.9171734335497546e-06, |
| "loss": 0.6703, |
| "step": 2444 |
| }, |
| { |
| "epoch": 0.07134832280374105, |
| "grad_norm": 0.35340216755867004, |
| "learning_rate": 1.9135448830448244e-06, |
| "loss": 0.722, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.07137750412186118, |
| "grad_norm": 0.46648624539375305, |
| "learning_rate": 1.9099189568515746e-06, |
| "loss": 0.6783, |
| "step": 2446 |
| }, |
| { |
| "epoch": 0.07140668543998133, |
| "grad_norm": 0.3888293504714966, |
| "learning_rate": 1.9062956580530096e-06, |
| "loss": 0.9337, |
| "step": 2447 |
| }, |
| { |
| "epoch": 0.07143586675810146, |
| "grad_norm": 0.3470037281513214, |
| "learning_rate": 1.902674989729904e-06, |
| "loss": 0.8058, |
| "step": 2448 |
| }, |
| { |
| "epoch": 0.0714650480762216, |
| "grad_norm": 0.39751961827278137, |
| "learning_rate": 1.8990569549607919e-06, |
| "loss": 0.6463, |
| "step": 2449 |
| }, |
| { |
| "epoch": 0.07149422939434175, |
| "grad_norm": 0.3989277184009552, |
| "learning_rate": 1.8954415568219697e-06, |
| "loss": 1.0606, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.07152341071246188, |
| "grad_norm": 0.3428085148334503, |
| "learning_rate": 1.8918287983874944e-06, |
| "loss": 1.1706, |
| "step": 2451 |
| }, |
| { |
| "epoch": 0.07155259203058202, |
| "grad_norm": 0.3947373628616333, |
| "learning_rate": 1.8882186827291749e-06, |
| "loss": 1.1514, |
| "step": 2452 |
| }, |
| { |
| "epoch": 0.07158177334870217, |
| "grad_norm": 0.39395517110824585, |
| "learning_rate": 1.8846112129165722e-06, |
| "loss": 0.5415, |
| "step": 2453 |
| }, |
| { |
| "epoch": 0.0716109546668223, |
| "grad_norm": 0.38713130354881287, |
| "learning_rate": 1.8810063920170029e-06, |
| "loss": 0.607, |
| "step": 2454 |
| }, |
| { |
| "epoch": 0.07164013598494244, |
| "grad_norm": 0.3700648546218872, |
| "learning_rate": 1.8774042230955248e-06, |
| "loss": 0.4881, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.07166931730306259, |
| "grad_norm": 0.3555237948894501, |
| "learning_rate": 1.8738047092149458e-06, |
| "loss": 0.5903, |
| "step": 2456 |
| }, |
| { |
| "epoch": 0.07169849862118272, |
| "grad_norm": 0.4225834012031555, |
| "learning_rate": 1.870207853435817e-06, |
| "loss": 0.7812, |
| "step": 2457 |
| }, |
| { |
| "epoch": 0.07172767993930286, |
| "grad_norm": 0.3896639347076416, |
| "learning_rate": 1.8666136588164242e-06, |
| "loss": 0.6603, |
| "step": 2458 |
| }, |
| { |
| "epoch": 0.071756861257423, |
| "grad_norm": 0.3705589473247528, |
| "learning_rate": 1.8630221284127924e-06, |
| "loss": 0.6749, |
| "step": 2459 |
| }, |
| { |
| "epoch": 0.07178604257554314, |
| "grad_norm": 0.4210870862007141, |
| "learning_rate": 1.8594332652786857e-06, |
| "loss": 0.6146, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.07181522389366328, |
| "grad_norm": 0.43848657608032227, |
| "learning_rate": 1.855847072465593e-06, |
| "loss": 0.6099, |
| "step": 2461 |
| }, |
| { |
| "epoch": 0.07184440521178341, |
| "grad_norm": 0.382726788520813, |
| "learning_rate": 1.8522635530227389e-06, |
| "loss": 0.5493, |
| "step": 2462 |
| }, |
| { |
| "epoch": 0.07187358652990355, |
| "grad_norm": 0.3996677100658417, |
| "learning_rate": 1.8486827099970739e-06, |
| "loss": 1.2025, |
| "step": 2463 |
| }, |
| { |
| "epoch": 0.0719027678480237, |
| "grad_norm": 0.3757278621196747, |
| "learning_rate": 1.8451045464332695e-06, |
| "loss": 0.4917, |
| "step": 2464 |
| }, |
| { |
| "epoch": 0.07193194916614383, |
| "grad_norm": 0.3189932107925415, |
| "learning_rate": 1.8415290653737217e-06, |
| "loss": 0.609, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.07196113048426397, |
| "grad_norm": 0.3765241205692291, |
| "learning_rate": 1.837956269858543e-06, |
| "loss": 0.5374, |
| "step": 2466 |
| }, |
| { |
| "epoch": 0.07199031180238412, |
| "grad_norm": 1.9458388090133667, |
| "learning_rate": 1.8343861629255673e-06, |
| "loss": 1.0712, |
| "step": 2467 |
| }, |
| { |
| "epoch": 0.07201949312050425, |
| "grad_norm": 0.3493138551712036, |
| "learning_rate": 1.8308187476103362e-06, |
| "loss": 0.5691, |
| "step": 2468 |
| }, |
| { |
| "epoch": 0.0720486744386244, |
| "grad_norm": 0.3632068336009979, |
| "learning_rate": 1.8272540269461086e-06, |
| "loss": 0.5956, |
| "step": 2469 |
| }, |
| { |
| "epoch": 0.07207785575674454, |
| "grad_norm": 0.3586715757846832, |
| "learning_rate": 1.8236920039638506e-06, |
| "loss": 0.5701, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.07210703707486467, |
| "grad_norm": 0.3733235001564026, |
| "learning_rate": 1.8201326816922322e-06, |
| "loss": 0.6601, |
| "step": 2471 |
| }, |
| { |
| "epoch": 0.07213621839298481, |
| "grad_norm": 0.38980433344841003, |
| "learning_rate": 1.8165760631576273e-06, |
| "loss": 1.4055, |
| "step": 2472 |
| }, |
| { |
| "epoch": 0.07216539971110494, |
| "grad_norm": 0.3927854001522064, |
| "learning_rate": 1.8130221513841144e-06, |
| "loss": 0.5129, |
| "step": 2473 |
| }, |
| { |
| "epoch": 0.07219458102922509, |
| "grad_norm": 0.36988565325737, |
| "learning_rate": 1.8094709493934665e-06, |
| "loss": 1.0852, |
| "step": 2474 |
| }, |
| { |
| "epoch": 0.07222376234734523, |
| "grad_norm": 0.38533470034599304, |
| "learning_rate": 1.8059224602051544e-06, |
| "loss": 1.333, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.07225294366546536, |
| "grad_norm": 0.4252498149871826, |
| "learning_rate": 1.802376686836345e-06, |
| "loss": 0.4924, |
| "step": 2476 |
| }, |
| { |
| "epoch": 0.07228212498358551, |
| "grad_norm": 0.416372686624527, |
| "learning_rate": 1.7988336323018913e-06, |
| "loss": 0.8022, |
| "step": 2477 |
| }, |
| { |
| "epoch": 0.07231130630170565, |
| "grad_norm": 0.40153470635414124, |
| "learning_rate": 1.795293299614334e-06, |
| "loss": 0.6141, |
| "step": 2478 |
| }, |
| { |
| "epoch": 0.07234048761982578, |
| "grad_norm": 0.3097986876964569, |
| "learning_rate": 1.7917556917839064e-06, |
| "loss": 1.5882, |
| "step": 2479 |
| }, |
| { |
| "epoch": 0.07236966893794593, |
| "grad_norm": 0.5378621816635132, |
| "learning_rate": 1.7882208118185168e-06, |
| "loss": 0.8857, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.07239885025606607, |
| "grad_norm": 0.3928397595882416, |
| "learning_rate": 1.7846886627237598e-06, |
| "loss": 0.641, |
| "step": 2481 |
| }, |
| { |
| "epoch": 0.0724280315741862, |
| "grad_norm": 0.40670332312583923, |
| "learning_rate": 1.7811592475029077e-06, |
| "loss": 0.5933, |
| "step": 2482 |
| }, |
| { |
| "epoch": 0.07245721289230635, |
| "grad_norm": 0.40453046560287476, |
| "learning_rate": 1.777632569156904e-06, |
| "loss": 0.6501, |
| "step": 2483 |
| }, |
| { |
| "epoch": 0.07248639421042649, |
| "grad_norm": 0.4500577449798584, |
| "learning_rate": 1.7741086306843675e-06, |
| "loss": 0.5604, |
| "step": 2484 |
| }, |
| { |
| "epoch": 0.07251557552854662, |
| "grad_norm": 0.4023430049419403, |
| "learning_rate": 1.7705874350815905e-06, |
| "loss": 0.7661, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.07254475684666677, |
| "grad_norm": 0.4476965069770813, |
| "learning_rate": 1.7670689853425266e-06, |
| "loss": 0.8507, |
| "step": 2486 |
| }, |
| { |
| "epoch": 0.0725739381647869, |
| "grad_norm": 0.3741615116596222, |
| "learning_rate": 1.7635532844588016e-06, |
| "loss": 1.0581, |
| "step": 2487 |
| }, |
| { |
| "epoch": 0.07260311948290704, |
| "grad_norm": 0.33607274293899536, |
| "learning_rate": 1.7600403354196982e-06, |
| "loss": 0.5593, |
| "step": 2488 |
| }, |
| { |
| "epoch": 0.07263230080102719, |
| "grad_norm": 0.38659921288490295, |
| "learning_rate": 1.7565301412121644e-06, |
| "loss": 0.6821, |
| "step": 2489 |
| }, |
| { |
| "epoch": 0.07266148211914732, |
| "grad_norm": 0.4163966476917267, |
| "learning_rate": 1.753022704820802e-06, |
| "loss": 0.6025, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.07269066343726746, |
| "grad_norm": 0.2747226357460022, |
| "learning_rate": 1.749518029227868e-06, |
| "loss": 0.2491, |
| "step": 2491 |
| }, |
| { |
| "epoch": 0.0727198447553876, |
| "grad_norm": 0.4343883991241455, |
| "learning_rate": 1.7460161174132746e-06, |
| "loss": 0.7117, |
| "step": 2492 |
| }, |
| { |
| "epoch": 0.07274902607350774, |
| "grad_norm": 0.4358595609664917, |
| "learning_rate": 1.7425169723545843e-06, |
| "loss": 0.504, |
| "step": 2493 |
| }, |
| { |
| "epoch": 0.07277820739162788, |
| "grad_norm": 0.35502952337265015, |
| "learning_rate": 1.7390205970270018e-06, |
| "loss": 0.578, |
| "step": 2494 |
| }, |
| { |
| "epoch": 0.07280738870974802, |
| "grad_norm": 0.32719168066978455, |
| "learning_rate": 1.7355269944033849e-06, |
| "loss": 0.6935, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.07283657002786816, |
| "grad_norm": 0.36326903104782104, |
| "learning_rate": 1.7320361674542268e-06, |
| "loss": 0.4608, |
| "step": 2496 |
| }, |
| { |
| "epoch": 0.0728657513459883, |
| "grad_norm": 0.42309632897377014, |
| "learning_rate": 1.7285481191476622e-06, |
| "loss": 1.2679, |
| "step": 2497 |
| }, |
| { |
| "epoch": 0.07289493266410844, |
| "grad_norm": 0.4983547627925873, |
| "learning_rate": 1.7250628524494672e-06, |
| "loss": 1.6493, |
| "step": 2498 |
| }, |
| { |
| "epoch": 0.07292411398222857, |
| "grad_norm": 0.40348270535469055, |
| "learning_rate": 1.721580370323047e-06, |
| "loss": 0.6063, |
| "step": 2499 |
| }, |
| { |
| "epoch": 0.07295329530034872, |
| "grad_norm": 0.3600374162197113, |
| "learning_rate": 1.7181006757294437e-06, |
| "loss": 0.6109, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.07298247661846885, |
| "grad_norm": 0.3590660095214844, |
| "learning_rate": 1.714623771627329e-06, |
| "loss": 0.7182, |
| "step": 2501 |
| }, |
| { |
| "epoch": 0.073011657936589, |
| "grad_norm": 0.40949514508247375, |
| "learning_rate": 1.7111496609729994e-06, |
| "loss": 1.3815, |
| "step": 2502 |
| }, |
| { |
| "epoch": 0.07304083925470914, |
| "grad_norm": 0.37961772084236145, |
| "learning_rate": 1.707678346720375e-06, |
| "loss": 0.8955, |
| "step": 2503 |
| }, |
| { |
| "epoch": 0.07307002057282927, |
| "grad_norm": 0.3748128414154053, |
| "learning_rate": 1.7042098318210044e-06, |
| "loss": 0.6756, |
| "step": 2504 |
| }, |
| { |
| "epoch": 0.07309920189094941, |
| "grad_norm": 0.4599277377128601, |
| "learning_rate": 1.7007441192240486e-06, |
| "loss": 0.8451, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.07312838320906956, |
| "grad_norm": 0.3844378888607025, |
| "learning_rate": 1.6972812118762915e-06, |
| "loss": 0.4846, |
| "step": 2506 |
| }, |
| { |
| "epoch": 0.07315756452718969, |
| "grad_norm": 0.4152567982673645, |
| "learning_rate": 1.693821112722131e-06, |
| "loss": 1.0798, |
| "step": 2507 |
| }, |
| { |
| "epoch": 0.07318674584530983, |
| "grad_norm": 0.37308329343795776, |
| "learning_rate": 1.6903638247035752e-06, |
| "loss": 0.7159, |
| "step": 2508 |
| }, |
| { |
| "epoch": 0.07321592716342998, |
| "grad_norm": 0.389787882566452, |
| "learning_rate": 1.6869093507602419e-06, |
| "loss": 0.5494, |
| "step": 2509 |
| }, |
| { |
| "epoch": 0.07324510848155011, |
| "grad_norm": 0.36313098669052124, |
| "learning_rate": 1.6834576938293562e-06, |
| "loss": 0.5972, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.07327428979967025, |
| "grad_norm": 0.4192260503768921, |
| "learning_rate": 1.68000885684575e-06, |
| "loss": 0.6555, |
| "step": 2511 |
| }, |
| { |
| "epoch": 0.0733034711177904, |
| "grad_norm": 0.3733217120170593, |
| "learning_rate": 1.6765628427418584e-06, |
| "loss": 0.5399, |
| "step": 2512 |
| }, |
| { |
| "epoch": 0.07333265243591053, |
| "grad_norm": 0.3721393942832947, |
| "learning_rate": 1.6731196544477108e-06, |
| "loss": 0.9389, |
| "step": 2513 |
| }, |
| { |
| "epoch": 0.07336183375403067, |
| "grad_norm": 0.40551477670669556, |
| "learning_rate": 1.6696792948909414e-06, |
| "loss": 1.0659, |
| "step": 2514 |
| }, |
| { |
| "epoch": 0.0733910150721508, |
| "grad_norm": 0.51129549741745, |
| "learning_rate": 1.6662417669967724e-06, |
| "loss": 1.0479, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.07342019639027095, |
| "grad_norm": 0.4952710270881653, |
| "learning_rate": 1.6628070736880202e-06, |
| "loss": 1.566, |
| "step": 2516 |
| }, |
| { |
| "epoch": 0.07344937770839109, |
| "grad_norm": 0.3567853271961212, |
| "learning_rate": 1.6593752178850937e-06, |
| "loss": 0.4982, |
| "step": 2517 |
| }, |
| { |
| "epoch": 0.07347855902651122, |
| "grad_norm": 0.35705626010894775, |
| "learning_rate": 1.655946202505989e-06, |
| "loss": 0.5666, |
| "step": 2518 |
| }, |
| { |
| "epoch": 0.07350774034463137, |
| "grad_norm": 0.35106685757637024, |
| "learning_rate": 1.6525200304662824e-06, |
| "loss": 0.6079, |
| "step": 2519 |
| }, |
| { |
| "epoch": 0.07353692166275151, |
| "grad_norm": 0.4099675714969635, |
| "learning_rate": 1.6490967046791389e-06, |
| "loss": 1.0956, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.07356610298087164, |
| "grad_norm": 0.36880987882614136, |
| "learning_rate": 1.6456762280552991e-06, |
| "loss": 0.5858, |
| "step": 2521 |
| }, |
| { |
| "epoch": 0.07359528429899179, |
| "grad_norm": 0.35809388756752014, |
| "learning_rate": 1.6422586035030796e-06, |
| "loss": 0.4894, |
| "step": 2522 |
| }, |
| { |
| "epoch": 0.07362446561711193, |
| "grad_norm": 0.36433690786361694, |
| "learning_rate": 1.6388438339283769e-06, |
| "loss": 1.1668, |
| "step": 2523 |
| }, |
| { |
| "epoch": 0.07365364693523206, |
| "grad_norm": 0.3582357168197632, |
| "learning_rate": 1.6354319222346588e-06, |
| "loss": 0.3903, |
| "step": 2524 |
| }, |
| { |
| "epoch": 0.0736828282533522, |
| "grad_norm": 0.33514243364334106, |
| "learning_rate": 1.6320228713229598e-06, |
| "loss": 1.1718, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.07371200957147234, |
| "grad_norm": 0.3750450015068054, |
| "learning_rate": 1.6286166840918865e-06, |
| "loss": 0.6427, |
| "step": 2526 |
| }, |
| { |
| "epoch": 0.07374119088959248, |
| "grad_norm": 0.3696582615375519, |
| "learning_rate": 1.625213363437607e-06, |
| "loss": 0.6735, |
| "step": 2527 |
| }, |
| { |
| "epoch": 0.07377037220771263, |
| "grad_norm": 0.5702472925186157, |
| "learning_rate": 1.621812912253854e-06, |
| "loss": 1.0402, |
| "step": 2528 |
| }, |
| { |
| "epoch": 0.07379955352583276, |
| "grad_norm": 0.35556790232658386, |
| "learning_rate": 1.618415333431917e-06, |
| "loss": 0.5579, |
| "step": 2529 |
| }, |
| { |
| "epoch": 0.0738287348439529, |
| "grad_norm": 0.3788866102695465, |
| "learning_rate": 1.6150206298606485e-06, |
| "loss": 0.4429, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.07385791616207305, |
| "grad_norm": 0.378307580947876, |
| "learning_rate": 1.6116288044264549e-06, |
| "loss": 0.6217, |
| "step": 2531 |
| }, |
| { |
| "epoch": 0.07388709748019318, |
| "grad_norm": 0.4378463327884674, |
| "learning_rate": 1.6082398600132909e-06, |
| "loss": 0.6754, |
| "step": 2532 |
| }, |
| { |
| "epoch": 0.07391627879831332, |
| "grad_norm": 0.4434072971343994, |
| "learning_rate": 1.6048537995026686e-06, |
| "loss": 1.1546, |
| "step": 2533 |
| }, |
| { |
| "epoch": 0.07394546011643346, |
| "grad_norm": 0.37783846259117126, |
| "learning_rate": 1.601470625773643e-06, |
| "loss": 1.1054, |
| "step": 2534 |
| }, |
| { |
| "epoch": 0.0739746414345536, |
| "grad_norm": 0.3737855851650238, |
| "learning_rate": 1.598090341702813e-06, |
| "loss": 0.416, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.07400382275267374, |
| "grad_norm": 0.40310072898864746, |
| "learning_rate": 1.594712950164326e-06, |
| "loss": 0.7229, |
| "step": 2536 |
| }, |
| { |
| "epoch": 0.07403300407079388, |
| "grad_norm": 0.41221755743026733, |
| "learning_rate": 1.5913384540298694e-06, |
| "loss": 0.4391, |
| "step": 2537 |
| }, |
| { |
| "epoch": 0.07406218538891401, |
| "grad_norm": 0.37780556082725525, |
| "learning_rate": 1.5879668561686624e-06, |
| "loss": 0.5232, |
| "step": 2538 |
| }, |
| { |
| "epoch": 0.07409136670703416, |
| "grad_norm": 0.3341659605503082, |
| "learning_rate": 1.584598159447468e-06, |
| "loss": 0.3965, |
| "step": 2539 |
| }, |
| { |
| "epoch": 0.07412054802515429, |
| "grad_norm": 0.49289295077323914, |
| "learning_rate": 1.5812323667305774e-06, |
| "loss": 0.7953, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.07414972934327443, |
| "grad_norm": 0.3711307644844055, |
| "learning_rate": 1.5778694808798112e-06, |
| "loss": 1.1165, |
| "step": 2541 |
| }, |
| { |
| "epoch": 0.07417891066139458, |
| "grad_norm": 0.38232657313346863, |
| "learning_rate": 1.574509504754524e-06, |
| "loss": 0.4319, |
| "step": 2542 |
| }, |
| { |
| "epoch": 0.07420809197951471, |
| "grad_norm": 0.38187873363494873, |
| "learning_rate": 1.5711524412115942e-06, |
| "loss": 0.6933, |
| "step": 2543 |
| }, |
| { |
| "epoch": 0.07423727329763485, |
| "grad_norm": 0.3133980631828308, |
| "learning_rate": 1.5677982931054197e-06, |
| "loss": 1.0265, |
| "step": 2544 |
| }, |
| { |
| "epoch": 0.074266454615755, |
| "grad_norm": 0.3908887803554535, |
| "learning_rate": 1.5644470632879266e-06, |
| "loss": 0.5822, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.07429563593387513, |
| "grad_norm": 0.39390799403190613, |
| "learning_rate": 1.5610987546085537e-06, |
| "loss": 0.5805, |
| "step": 2546 |
| }, |
| { |
| "epoch": 0.07432481725199527, |
| "grad_norm": 0.326328307390213, |
| "learning_rate": 1.5577533699142565e-06, |
| "loss": 1.0381, |
| "step": 2547 |
| }, |
| { |
| "epoch": 0.07435399857011542, |
| "grad_norm": 0.4342818856239319, |
| "learning_rate": 1.5544109120495087e-06, |
| "loss": 1.4437, |
| "step": 2548 |
| }, |
| { |
| "epoch": 0.07438317988823555, |
| "grad_norm": 0.35852742195129395, |
| "learning_rate": 1.551071383856293e-06, |
| "loss": 0.5159, |
| "step": 2549 |
| }, |
| { |
| "epoch": 0.07441236120635569, |
| "grad_norm": 0.29673218727111816, |
| "learning_rate": 1.5477347881740996e-06, |
| "loss": 0.5198, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.07444154252447584, |
| "grad_norm": 0.3413328528404236, |
| "learning_rate": 1.5444011278399252e-06, |
| "loss": 0.9861, |
| "step": 2551 |
| }, |
| { |
| "epoch": 0.07447072384259597, |
| "grad_norm": 0.3477269113063812, |
| "learning_rate": 1.5410704056882753e-06, |
| "loss": 0.5804, |
| "step": 2552 |
| }, |
| { |
| "epoch": 0.07449990516071611, |
| "grad_norm": 0.4687328338623047, |
| "learning_rate": 1.5377426245511512e-06, |
| "loss": 0.6313, |
| "step": 2553 |
| }, |
| { |
| "epoch": 0.07452908647883624, |
| "grad_norm": 0.38218438625335693, |
| "learning_rate": 1.5344177872580546e-06, |
| "loss": 0.5239, |
| "step": 2554 |
| }, |
| { |
| "epoch": 0.07455826779695639, |
| "grad_norm": 0.40153077244758606, |
| "learning_rate": 1.5310958966359906e-06, |
| "loss": 0.5693, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.07458744911507653, |
| "grad_norm": 0.38287755846977234, |
| "learning_rate": 1.5277769555094517e-06, |
| "loss": 1.1242, |
| "step": 2556 |
| }, |
| { |
| "epoch": 0.07461663043319666, |
| "grad_norm": 0.374569296836853, |
| "learning_rate": 1.5244609667004234e-06, |
| "loss": 0.5063, |
| "step": 2557 |
| }, |
| { |
| "epoch": 0.0746458117513168, |
| "grad_norm": 0.4813336730003357, |
| "learning_rate": 1.5211479330283846e-06, |
| "loss": 0.6274, |
| "step": 2558 |
| }, |
| { |
| "epoch": 0.07467499306943695, |
| "grad_norm": 0.33612048625946045, |
| "learning_rate": 1.5178378573102986e-06, |
| "loss": 0.5307, |
| "step": 2559 |
| }, |
| { |
| "epoch": 0.07470417438755708, |
| "grad_norm": 0.45173701643943787, |
| "learning_rate": 1.514530742360612e-06, |
| "loss": 0.5431, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.07473335570567723, |
| "grad_norm": 0.39904889464378357, |
| "learning_rate": 1.5112265909912589e-06, |
| "loss": 1.7979, |
| "step": 2561 |
| }, |
| { |
| "epoch": 0.07476253702379737, |
| "grad_norm": 0.3838162422180176, |
| "learning_rate": 1.507925406011651e-06, |
| "loss": 1.5218, |
| "step": 2562 |
| }, |
| { |
| "epoch": 0.0747917183419175, |
| "grad_norm": 0.4197467565536499, |
| "learning_rate": 1.5046271902286764e-06, |
| "loss": 0.6602, |
| "step": 2563 |
| }, |
| { |
| "epoch": 0.07482089966003765, |
| "grad_norm": 0.33908793330192566, |
| "learning_rate": 1.5013319464467019e-06, |
| "loss": 0.4107, |
| "step": 2564 |
| }, |
| { |
| "epoch": 0.07485008097815779, |
| "grad_norm": 0.49844592809677124, |
| "learning_rate": 1.4980396774675642e-06, |
| "loss": 1.0999, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.07487926229627792, |
| "grad_norm": 0.3603459298610687, |
| "learning_rate": 1.49475038609057e-06, |
| "loss": 0.5423, |
| "step": 2566 |
| }, |
| { |
| "epoch": 0.07490844361439807, |
| "grad_norm": 0.40021106600761414, |
| "learning_rate": 1.4914640751124977e-06, |
| "loss": 1.1444, |
| "step": 2567 |
| }, |
| { |
| "epoch": 0.0749376249325182, |
| "grad_norm": 0.5920940637588501, |
| "learning_rate": 1.4881807473275916e-06, |
| "loss": 1.5685, |
| "step": 2568 |
| }, |
| { |
| "epoch": 0.07496680625063834, |
| "grad_norm": 0.35790106654167175, |
| "learning_rate": 1.484900405527554e-06, |
| "loss": 1.0304, |
| "step": 2569 |
| }, |
| { |
| "epoch": 0.07499598756875848, |
| "grad_norm": 0.35599228739738464, |
| "learning_rate": 1.4816230525015563e-06, |
| "loss": 1.0517, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.07502516888687862, |
| "grad_norm": 0.3407399356365204, |
| "learning_rate": 1.4783486910362217e-06, |
| "loss": 1.0506, |
| "step": 2571 |
| }, |
| { |
| "epoch": 0.07505435020499876, |
| "grad_norm": 0.3198636770248413, |
| "learning_rate": 1.4750773239156313e-06, |
| "loss": 1.061, |
| "step": 2572 |
| }, |
| { |
| "epoch": 0.0750835315231189, |
| "grad_norm": 0.4800258278846741, |
| "learning_rate": 1.4718089539213237e-06, |
| "loss": 0.8734, |
| "step": 2573 |
| }, |
| { |
| "epoch": 0.07511271284123903, |
| "grad_norm": 0.42713841795921326, |
| "learning_rate": 1.4685435838322876e-06, |
| "loss": 1.4062, |
| "step": 2574 |
| }, |
| { |
| "epoch": 0.07514189415935918, |
| "grad_norm": 0.38790127635002136, |
| "learning_rate": 1.4652812164249586e-06, |
| "loss": 0.9377, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.07517107547747932, |
| "grad_norm": 0.3390432894229889, |
| "learning_rate": 1.4620218544732195e-06, |
| "loss": 0.49, |
| "step": 2576 |
| }, |
| { |
| "epoch": 0.07520025679559945, |
| "grad_norm": 0.3834192156791687, |
| "learning_rate": 1.458765500748402e-06, |
| "loss": 0.7178, |
| "step": 2577 |
| }, |
| { |
| "epoch": 0.0752294381137196, |
| "grad_norm": 0.3675811290740967, |
| "learning_rate": 1.4555121580192744e-06, |
| "loss": 0.5448, |
| "step": 2578 |
| }, |
| { |
| "epoch": 0.07525861943183973, |
| "grad_norm": 0.5766485333442688, |
| "learning_rate": 1.4522618290520485e-06, |
| "loss": 0.8365, |
| "step": 2579 |
| }, |
| { |
| "epoch": 0.07528780074995987, |
| "grad_norm": 0.33565324544906616, |
| "learning_rate": 1.4490145166103742e-06, |
| "loss": 0.4851, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.07531698206808002, |
| "grad_norm": 0.45304179191589355, |
| "learning_rate": 1.4457702234553334e-06, |
| "loss": 0.8912, |
| "step": 2581 |
| }, |
| { |
| "epoch": 0.07534616338620015, |
| "grad_norm": 0.44394874572753906, |
| "learning_rate": 1.4425289523454412e-06, |
| "loss": 1.5025, |
| "step": 2582 |
| }, |
| { |
| "epoch": 0.0753753447043203, |
| "grad_norm": 0.38666340708732605, |
| "learning_rate": 1.4392907060366461e-06, |
| "loss": 1.7349, |
| "step": 2583 |
| }, |
| { |
| "epoch": 0.07540452602244044, |
| "grad_norm": 0.36668702960014343, |
| "learning_rate": 1.4360554872823223e-06, |
| "loss": 0.4822, |
| "step": 2584 |
| }, |
| { |
| "epoch": 0.07543370734056057, |
| "grad_norm": 0.39088961482048035, |
| "learning_rate": 1.4328232988332692e-06, |
| "loss": 0.6108, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.07546288865868071, |
| "grad_norm": 0.4104520380496979, |
| "learning_rate": 1.429594143437711e-06, |
| "loss": 0.6043, |
| "step": 2586 |
| }, |
| { |
| "epoch": 0.07549206997680086, |
| "grad_norm": 0.47211554646492004, |
| "learning_rate": 1.4263680238412947e-06, |
| "loss": 0.4607, |
| "step": 2587 |
| }, |
| { |
| "epoch": 0.07552125129492099, |
| "grad_norm": 0.4273495674133301, |
| "learning_rate": 1.423144942787082e-06, |
| "loss": 0.9207, |
| "step": 2588 |
| }, |
| { |
| "epoch": 0.07555043261304113, |
| "grad_norm": 0.3817402720451355, |
| "learning_rate": 1.4199249030155549e-06, |
| "loss": 0.5179, |
| "step": 2589 |
| }, |
| { |
| "epoch": 0.07557961393116128, |
| "grad_norm": 0.49887150526046753, |
| "learning_rate": 1.4167079072646073e-06, |
| "loss": 0.7416, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.07560879524928141, |
| "grad_norm": 0.4196615219116211, |
| "learning_rate": 1.4134939582695428e-06, |
| "loss": 1.3193, |
| "step": 2591 |
| }, |
| { |
| "epoch": 0.07563797656740155, |
| "grad_norm": 0.4267847537994385, |
| "learning_rate": 1.4102830587630795e-06, |
| "loss": 0.9059, |
| "step": 2592 |
| }, |
| { |
| "epoch": 0.07566715788552168, |
| "grad_norm": 0.3905622959136963, |
| "learning_rate": 1.4070752114753405e-06, |
| "loss": 0.6422, |
| "step": 2593 |
| }, |
| { |
| "epoch": 0.07569633920364183, |
| "grad_norm": 0.4203459918498993, |
| "learning_rate": 1.4038704191338526e-06, |
| "loss": 0.5902, |
| "step": 2594 |
| }, |
| { |
| "epoch": 0.07572552052176197, |
| "grad_norm": 0.3489826023578644, |
| "learning_rate": 1.4006686844635435e-06, |
| "loss": 0.5587, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.0757547018398821, |
| "grad_norm": 0.3784318268299103, |
| "learning_rate": 1.3974700101867473e-06, |
| "loss": 0.5615, |
| "step": 2596 |
| }, |
| { |
| "epoch": 0.07578388315800225, |
| "grad_norm": 0.3758774697780609, |
| "learning_rate": 1.3942743990231883e-06, |
| "loss": 0.9165, |
| "step": 2597 |
| }, |
| { |
| "epoch": 0.07581306447612239, |
| "grad_norm": 0.3940548002719879, |
| "learning_rate": 1.3910818536899918e-06, |
| "loss": 0.7935, |
| "step": 2598 |
| }, |
| { |
| "epoch": 0.07584224579424252, |
| "grad_norm": 0.3558518588542938, |
| "learning_rate": 1.387892376901675e-06, |
| "loss": 0.737, |
| "step": 2599 |
| }, |
| { |
| "epoch": 0.07587142711236267, |
| "grad_norm": 0.5340282320976257, |
| "learning_rate": 1.3847059713701456e-06, |
| "loss": 0.628, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.07590060843048281, |
| "grad_norm": 0.3865623474121094, |
| "learning_rate": 1.381522639804697e-06, |
| "loss": 0.6232, |
| "step": 2601 |
| }, |
| { |
| "epoch": 0.07592978974860294, |
| "grad_norm": 0.3389996886253357, |
| "learning_rate": 1.3783423849120158e-06, |
| "loss": 1.0672, |
| "step": 2602 |
| }, |
| { |
| "epoch": 0.07595897106672309, |
| "grad_norm": 0.394597589969635, |
| "learning_rate": 1.3751652093961648e-06, |
| "loss": 0.7039, |
| "step": 2603 |
| }, |
| { |
| "epoch": 0.07598815238484323, |
| "grad_norm": 0.35606124997138977, |
| "learning_rate": 1.3719911159585942e-06, |
| "loss": 0.536, |
| "step": 2604 |
| }, |
| { |
| "epoch": 0.07601733370296336, |
| "grad_norm": 0.3423217236995697, |
| "learning_rate": 1.3688201072981333e-06, |
| "loss": 0.4966, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.0760465150210835, |
| "grad_norm": 0.39531591534614563, |
| "learning_rate": 1.3656521861109845e-06, |
| "loss": 0.6786, |
| "step": 2606 |
| }, |
| { |
| "epoch": 0.07607569633920364, |
| "grad_norm": 0.36891868710517883, |
| "learning_rate": 1.362487355090727e-06, |
| "loss": 1.1483, |
| "step": 2607 |
| }, |
| { |
| "epoch": 0.07610487765732378, |
| "grad_norm": 0.34951213002204895, |
| "learning_rate": 1.3593256169283153e-06, |
| "loss": 0.682, |
| "step": 2608 |
| }, |
| { |
| "epoch": 0.07613405897544392, |
| "grad_norm": 0.4387266933917999, |
| "learning_rate": 1.3561669743120686e-06, |
| "loss": 0.6061, |
| "step": 2609 |
| }, |
| { |
| "epoch": 0.07616324029356406, |
| "grad_norm": 0.3602062165737152, |
| "learning_rate": 1.3530114299276797e-06, |
| "loss": 0.4626, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.0761924216116842, |
| "grad_norm": 0.4296179711818695, |
| "learning_rate": 1.349858986458205e-06, |
| "loss": 0.7654, |
| "step": 2611 |
| }, |
| { |
| "epoch": 0.07622160292980434, |
| "grad_norm": 0.3976057767868042, |
| "learning_rate": 1.3467096465840635e-06, |
| "loss": 1.0187, |
| "step": 2612 |
| }, |
| { |
| "epoch": 0.07625078424792447, |
| "grad_norm": 0.3597880005836487, |
| "learning_rate": 1.3435634129830349e-06, |
| "loss": 0.6492, |
| "step": 2613 |
| }, |
| { |
| "epoch": 0.07627996556604462, |
| "grad_norm": 0.3498636782169342, |
| "learning_rate": 1.3404202883302575e-06, |
| "loss": 0.3831, |
| "step": 2614 |
| }, |
| { |
| "epoch": 0.07630914688416476, |
| "grad_norm": 0.3697919547557831, |
| "learning_rate": 1.3372802752982295e-06, |
| "loss": 0.7359, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.0763383282022849, |
| "grad_norm": 0.38037577271461487, |
| "learning_rate": 1.334143376556799e-06, |
| "loss": 0.5157, |
| "step": 2616 |
| }, |
| { |
| "epoch": 0.07636750952040504, |
| "grad_norm": 0.3552990257740021, |
| "learning_rate": 1.3310095947731695e-06, |
| "loss": 0.5277, |
| "step": 2617 |
| }, |
| { |
| "epoch": 0.07639669083852518, |
| "grad_norm": 0.3449668288230896, |
| "learning_rate": 1.3278789326118945e-06, |
| "loss": 0.4385, |
| "step": 2618 |
| }, |
| { |
| "epoch": 0.07642587215664531, |
| "grad_norm": 0.5058618783950806, |
| "learning_rate": 1.3247513927348715e-06, |
| "loss": 0.5839, |
| "step": 2619 |
| }, |
| { |
| "epoch": 0.07645505347476546, |
| "grad_norm": 0.4882209300994873, |
| "learning_rate": 1.3216269778013447e-06, |
| "loss": 0.5863, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.07648423479288559, |
| "grad_norm": 0.4103037416934967, |
| "learning_rate": 1.3185056904679038e-06, |
| "loss": 0.6676, |
| "step": 2621 |
| }, |
| { |
| "epoch": 0.07651341611100573, |
| "grad_norm": 0.44188377261161804, |
| "learning_rate": 1.3153875333884742e-06, |
| "loss": 1.0711, |
| "step": 2622 |
| }, |
| { |
| "epoch": 0.07654259742912588, |
| "grad_norm": 0.363661527633667, |
| "learning_rate": 1.3122725092143252e-06, |
| "loss": 0.4485, |
| "step": 2623 |
| }, |
| { |
| "epoch": 0.07657177874724601, |
| "grad_norm": 0.4409139156341553, |
| "learning_rate": 1.3091606205940604e-06, |
| "loss": 0.7827, |
| "step": 2624 |
| }, |
| { |
| "epoch": 0.07660096006536615, |
| "grad_norm": 0.3666093349456787, |
| "learning_rate": 1.3060518701736153e-06, |
| "loss": 0.5889, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.0766301413834863, |
| "grad_norm": 0.3922710716724396, |
| "learning_rate": 1.3029462605962578e-06, |
| "loss": 0.4999, |
| "step": 2626 |
| }, |
| { |
| "epoch": 0.07665932270160643, |
| "grad_norm": 0.396491676568985, |
| "learning_rate": 1.2998437945025883e-06, |
| "loss": 1.0237, |
| "step": 2627 |
| }, |
| { |
| "epoch": 0.07668850401972657, |
| "grad_norm": 0.36983323097229004, |
| "learning_rate": 1.2967444745305291e-06, |
| "loss": 1.0797, |
| "step": 2628 |
| }, |
| { |
| "epoch": 0.07671768533784672, |
| "grad_norm": 0.4503006935119629, |
| "learning_rate": 1.293648303315332e-06, |
| "loss": 0.3405, |
| "step": 2629 |
| }, |
| { |
| "epoch": 0.07674686665596685, |
| "grad_norm": 0.4023594856262207, |
| "learning_rate": 1.2905552834895718e-06, |
| "loss": 1.042, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.07677604797408699, |
| "grad_norm": 0.3823414742946625, |
| "learning_rate": 1.2874654176831403e-06, |
| "loss": 0.7493, |
| "step": 2631 |
| }, |
| { |
| "epoch": 0.07680522929220712, |
| "grad_norm": 0.33881792426109314, |
| "learning_rate": 1.2843787085232474e-06, |
| "loss": 0.6991, |
| "step": 2632 |
| }, |
| { |
| "epoch": 0.07683441061032727, |
| "grad_norm": 0.3914770483970642, |
| "learning_rate": 1.2812951586344236e-06, |
| "loss": 0.6684, |
| "step": 2633 |
| }, |
| { |
| "epoch": 0.07686359192844741, |
| "grad_norm": 0.3733922243118286, |
| "learning_rate": 1.278214770638508e-06, |
| "loss": 0.4265, |
| "step": 2634 |
| }, |
| { |
| "epoch": 0.07689277324656754, |
| "grad_norm": 0.34755009412765503, |
| "learning_rate": 1.2751375471546568e-06, |
| "loss": 1.1077, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.07692195456468769, |
| "grad_norm": 0.37347322702407837, |
| "learning_rate": 1.2720634907993278e-06, |
| "loss": 0.7075, |
| "step": 2636 |
| }, |
| { |
| "epoch": 0.07695113588280783, |
| "grad_norm": 0.39913591742515564, |
| "learning_rate": 1.2689926041862955e-06, |
| "loss": 0.6055, |
| "step": 2637 |
| }, |
| { |
| "epoch": 0.07698031720092796, |
| "grad_norm": 0.3538033366203308, |
| "learning_rate": 1.2659248899266314e-06, |
| "loss": 0.3989, |
| "step": 2638 |
| }, |
| { |
| "epoch": 0.0770094985190481, |
| "grad_norm": 0.35297930240631104, |
| "learning_rate": 1.2628603506287108e-06, |
| "loss": 0.3746, |
| "step": 2639 |
| }, |
| { |
| "epoch": 0.07703867983716825, |
| "grad_norm": 0.3447231948375702, |
| "learning_rate": 1.2597989888982131e-06, |
| "loss": 0.4505, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.07706786115528838, |
| "grad_norm": 0.41684210300445557, |
| "learning_rate": 1.256740807338116e-06, |
| "loss": 0.4523, |
| "step": 2641 |
| }, |
| { |
| "epoch": 0.07709704247340853, |
| "grad_norm": 0.36235302686691284, |
| "learning_rate": 1.2536858085486863e-06, |
| "loss": 0.6463, |
| "step": 2642 |
| }, |
| { |
| "epoch": 0.07712622379152867, |
| "grad_norm": 0.44401517510414124, |
| "learning_rate": 1.2506339951274942e-06, |
| "loss": 1.0798, |
| "step": 2643 |
| }, |
| { |
| "epoch": 0.0771554051096488, |
| "grad_norm": 0.37657594680786133, |
| "learning_rate": 1.247585369669394e-06, |
| "loss": 0.7267, |
| "step": 2644 |
| }, |
| { |
| "epoch": 0.07718458642776894, |
| "grad_norm": 0.36211517453193665, |
| "learning_rate": 1.2445399347665315e-06, |
| "loss": 0.5756, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.07721376774588908, |
| "grad_norm": 0.3776366114616394, |
| "learning_rate": 1.2414976930083423e-06, |
| "loss": 1.1199, |
| "step": 2646 |
| }, |
| { |
| "epoch": 0.07724294906400922, |
| "grad_norm": 0.4251892864704132, |
| "learning_rate": 1.238458646981543e-06, |
| "loss": 1.3051, |
| "step": 2647 |
| }, |
| { |
| "epoch": 0.07727213038212936, |
| "grad_norm": 0.518762469291687, |
| "learning_rate": 1.2354227992701352e-06, |
| "loss": 0.6577, |
| "step": 2648 |
| }, |
| { |
| "epoch": 0.0773013117002495, |
| "grad_norm": 0.5408696532249451, |
| "learning_rate": 1.2323901524554027e-06, |
| "loss": 0.5946, |
| "step": 2649 |
| }, |
| { |
| "epoch": 0.07733049301836964, |
| "grad_norm": 0.379725843667984, |
| "learning_rate": 1.2293607091159043e-06, |
| "loss": 0.7646, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.07735967433648978, |
| "grad_norm": 0.3809633255004883, |
| "learning_rate": 1.2263344718274756e-06, |
| "loss": 0.5448, |
| "step": 2651 |
| }, |
| { |
| "epoch": 0.07738885565460991, |
| "grad_norm": 0.4189484417438507, |
| "learning_rate": 1.2233114431632287e-06, |
| "loss": 0.5962, |
| "step": 2652 |
| }, |
| { |
| "epoch": 0.07741803697273006, |
| "grad_norm": 0.3845962882041931, |
| "learning_rate": 1.2202916256935442e-06, |
| "loss": 0.499, |
| "step": 2653 |
| }, |
| { |
| "epoch": 0.0774472182908502, |
| "grad_norm": 0.3461228609085083, |
| "learning_rate": 1.2172750219860746e-06, |
| "loss": 0.9558, |
| "step": 2654 |
| }, |
| { |
| "epoch": 0.07747639960897033, |
| "grad_norm": 0.3950653672218323, |
| "learning_rate": 1.2142616346057407e-06, |
| "loss": 1.0451, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.07750558092709048, |
| "grad_norm": 0.41117316484451294, |
| "learning_rate": 1.2112514661147262e-06, |
| "loss": 0.5097, |
| "step": 2656 |
| }, |
| { |
| "epoch": 0.07753476224521062, |
| "grad_norm": 0.3654671311378479, |
| "learning_rate": 1.2082445190724778e-06, |
| "loss": 0.5324, |
| "step": 2657 |
| }, |
| { |
| "epoch": 0.07756394356333075, |
| "grad_norm": 0.37353911995887756, |
| "learning_rate": 1.2052407960357038e-06, |
| "loss": 0.5586, |
| "step": 2658 |
| }, |
| { |
| "epoch": 0.0775931248814509, |
| "grad_norm": 0.3693462312221527, |
| "learning_rate": 1.202240299558372e-06, |
| "loss": 0.4585, |
| "step": 2659 |
| }, |
| { |
| "epoch": 0.07762230619957103, |
| "grad_norm": 0.4032810926437378, |
| "learning_rate": 1.199243032191708e-06, |
| "loss": 0.4809, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.07765148751769117, |
| "grad_norm": 0.3969374895095825, |
| "learning_rate": 1.196248996484188e-06, |
| "loss": 0.6517, |
| "step": 2661 |
| }, |
| { |
| "epoch": 0.07768066883581132, |
| "grad_norm": 0.5283824801445007, |
| "learning_rate": 1.1932581949815435e-06, |
| "loss": 1.2015, |
| "step": 2662 |
| }, |
| { |
| "epoch": 0.07770985015393145, |
| "grad_norm": 0.34324243664741516, |
| "learning_rate": 1.1902706302267552e-06, |
| "loss": 1.1021, |
| "step": 2663 |
| }, |
| { |
| "epoch": 0.07773903147205159, |
| "grad_norm": 0.38527441024780273, |
| "learning_rate": 1.1872863047600498e-06, |
| "loss": 0.4788, |
| "step": 2664 |
| }, |
| { |
| "epoch": 0.07776821279017174, |
| "grad_norm": 0.32983332872390747, |
| "learning_rate": 1.1843052211189027e-06, |
| "loss": 0.483, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.07779739410829187, |
| "grad_norm": 0.7628883123397827, |
| "learning_rate": 1.1813273818380327e-06, |
| "loss": 1.749, |
| "step": 2666 |
| }, |
| { |
| "epoch": 0.07782657542641201, |
| "grad_norm": 0.31569838523864746, |
| "learning_rate": 1.178352789449397e-06, |
| "loss": 0.5227, |
| "step": 2667 |
| }, |
| { |
| "epoch": 0.07785575674453216, |
| "grad_norm": 0.3513583838939667, |
| "learning_rate": 1.1753814464821962e-06, |
| "loss": 0.9938, |
| "step": 2668 |
| }, |
| { |
| "epoch": 0.07788493806265229, |
| "grad_norm": 0.355286568403244, |
| "learning_rate": 1.1724133554628648e-06, |
| "loss": 0.5522, |
| "step": 2669 |
| }, |
| { |
| "epoch": 0.07791411938077243, |
| "grad_norm": 0.3142109513282776, |
| "learning_rate": 1.1694485189150717e-06, |
| "loss": 0.8859, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.07794330069889258, |
| "grad_norm": 0.429188072681427, |
| "learning_rate": 1.1664869393597238e-06, |
| "loss": 1.0678, |
| "step": 2671 |
| }, |
| { |
| "epoch": 0.0779724820170127, |
| "grad_norm": 0.36941587924957275, |
| "learning_rate": 1.1635286193149515e-06, |
| "loss": 0.4786, |
| "step": 2672 |
| }, |
| { |
| "epoch": 0.07800166333513285, |
| "grad_norm": 0.5776618123054504, |
| "learning_rate": 1.1605735612961194e-06, |
| "loss": 0.6085, |
| "step": 2673 |
| }, |
| { |
| "epoch": 0.07803084465325298, |
| "grad_norm": 0.3690410256385803, |
| "learning_rate": 1.157621767815818e-06, |
| "loss": 0.6029, |
| "step": 2674 |
| }, |
| { |
| "epoch": 0.07806002597137313, |
| "grad_norm": 0.37634676694869995, |
| "learning_rate": 1.1546732413838586e-06, |
| "loss": 0.5333, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.07808920728949327, |
| "grad_norm": 0.5132603645324707, |
| "learning_rate": 1.151727984507277e-06, |
| "loss": 1.4964, |
| "step": 2676 |
| }, |
| { |
| "epoch": 0.0781183886076134, |
| "grad_norm": 0.4166743755340576, |
| "learning_rate": 1.1487859996903272e-06, |
| "loss": 0.6977, |
| "step": 2677 |
| }, |
| { |
| "epoch": 0.07814756992573355, |
| "grad_norm": 0.38110029697418213, |
| "learning_rate": 1.145847289434484e-06, |
| "loss": 0.6621, |
| "step": 2678 |
| }, |
| { |
| "epoch": 0.07817675124385369, |
| "grad_norm": 0.6142292618751526, |
| "learning_rate": 1.1429118562384378e-06, |
| "loss": 1.0403, |
| "step": 2679 |
| }, |
| { |
| "epoch": 0.07820593256197382, |
| "grad_norm": 0.3477693200111389, |
| "learning_rate": 1.1399797025980886e-06, |
| "loss": 0.5752, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.07823511388009396, |
| "grad_norm": 0.7034415602684021, |
| "learning_rate": 1.1370508310065532e-06, |
| "loss": 0.9527, |
| "step": 2681 |
| }, |
| { |
| "epoch": 0.07826429519821411, |
| "grad_norm": 0.36272910237312317, |
| "learning_rate": 1.1341252439541539e-06, |
| "loss": 0.5774, |
| "step": 2682 |
| }, |
| { |
| "epoch": 0.07829347651633424, |
| "grad_norm": 0.36508727073669434, |
| "learning_rate": 1.13120294392842e-06, |
| "loss": 0.8186, |
| "step": 2683 |
| }, |
| { |
| "epoch": 0.07832265783445438, |
| "grad_norm": 0.3363402485847473, |
| "learning_rate": 1.128283933414089e-06, |
| "loss": 0.8677, |
| "step": 2684 |
| }, |
| { |
| "epoch": 0.07835183915257452, |
| "grad_norm": 0.39723479747772217, |
| "learning_rate": 1.1253682148931011e-06, |
| "loss": 0.5279, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.07838102047069466, |
| "grad_norm": 0.3228338658809662, |
| "learning_rate": 1.1224557908445937e-06, |
| "loss": 0.5327, |
| "step": 2686 |
| }, |
| { |
| "epoch": 0.0784102017888148, |
| "grad_norm": 0.4597456157207489, |
| "learning_rate": 1.1195466637449087e-06, |
| "loss": 0.7677, |
| "step": 2687 |
| }, |
| { |
| "epoch": 0.07843938310693493, |
| "grad_norm": 0.4246422350406647, |
| "learning_rate": 1.1166408360675796e-06, |
| "loss": 0.8394, |
| "step": 2688 |
| }, |
| { |
| "epoch": 0.07846856442505508, |
| "grad_norm": 0.37677639722824097, |
| "learning_rate": 1.113738310283335e-06, |
| "loss": 1.2058, |
| "step": 2689 |
| }, |
| { |
| "epoch": 0.07849774574317522, |
| "grad_norm": 0.39904212951660156, |
| "learning_rate": 1.110839088860099e-06, |
| "loss": 1.2364, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.07852692706129535, |
| "grad_norm": 0.3325336277484894, |
| "learning_rate": 1.1079431742629865e-06, |
| "loss": 1.6689, |
| "step": 2691 |
| }, |
| { |
| "epoch": 0.0785561083794155, |
| "grad_norm": 0.37825027108192444, |
| "learning_rate": 1.1050505689542955e-06, |
| "loss": 0.579, |
| "step": 2692 |
| }, |
| { |
| "epoch": 0.07858528969753564, |
| "grad_norm": 0.33693385124206543, |
| "learning_rate": 1.1021612753935167e-06, |
| "loss": 1.0076, |
| "step": 2693 |
| }, |
| { |
| "epoch": 0.07861447101565577, |
| "grad_norm": 0.3953251540660858, |
| "learning_rate": 1.0992752960373204e-06, |
| "loss": 0.7272, |
| "step": 2694 |
| }, |
| { |
| "epoch": 0.07864365233377592, |
| "grad_norm": 0.3894476890563965, |
| "learning_rate": 1.096392633339558e-06, |
| "loss": 0.8238, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.07867283365189606, |
| "grad_norm": 0.4731530249118805, |
| "learning_rate": 1.0935132897512657e-06, |
| "loss": 1.2081, |
| "step": 2696 |
| }, |
| { |
| "epoch": 0.0787020149700162, |
| "grad_norm": 0.579258143901825, |
| "learning_rate": 1.0906372677206562e-06, |
| "loss": 0.6377, |
| "step": 2697 |
| }, |
| { |
| "epoch": 0.07873119628813634, |
| "grad_norm": 0.370090514421463, |
| "learning_rate": 1.0877645696931149e-06, |
| "loss": 0.6377, |
| "step": 2698 |
| }, |
| { |
| "epoch": 0.07876037760625647, |
| "grad_norm": 0.34545591473579407, |
| "learning_rate": 1.0848951981112016e-06, |
| "loss": 0.5814, |
| "step": 2699 |
| }, |
| { |
| "epoch": 0.07878955892437661, |
| "grad_norm": 0.3582421839237213, |
| "learning_rate": 1.0820291554146522e-06, |
| "loss": 0.4809, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.07881874024249676, |
| "grad_norm": 0.30551865696907043, |
| "learning_rate": 1.079166444040367e-06, |
| "loss": 0.2946, |
| "step": 2701 |
| }, |
| { |
| "epoch": 0.07884792156061689, |
| "grad_norm": 0.3131472170352936, |
| "learning_rate": 1.0763070664224145e-06, |
| "loss": 1.0246, |
| "step": 2702 |
| }, |
| { |
| "epoch": 0.07887710287873703, |
| "grad_norm": 0.510502278804779, |
| "learning_rate": 1.0734510249920316e-06, |
| "loss": 0.9957, |
| "step": 2703 |
| }, |
| { |
| "epoch": 0.07890628419685718, |
| "grad_norm": 0.284952312707901, |
| "learning_rate": 1.0705983221776173e-06, |
| "loss": 0.9478, |
| "step": 2704 |
| }, |
| { |
| "epoch": 0.07893546551497731, |
| "grad_norm": 0.4157751798629761, |
| "learning_rate": 1.0677489604047292e-06, |
| "loss": 0.7175, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.07896464683309745, |
| "grad_norm": 0.5279375910758972, |
| "learning_rate": 1.0649029420960894e-06, |
| "loss": 0.6313, |
| "step": 2706 |
| }, |
| { |
| "epoch": 0.0789938281512176, |
| "grad_norm": 0.3475935459136963, |
| "learning_rate": 1.0620602696715715e-06, |
| "loss": 0.7483, |
| "step": 2707 |
| }, |
| { |
| "epoch": 0.07902300946933773, |
| "grad_norm": 0.6905449032783508, |
| "learning_rate": 1.059220945548206e-06, |
| "loss": 0.6429, |
| "step": 2708 |
| }, |
| { |
| "epoch": 0.07905219078745787, |
| "grad_norm": 0.3662562370300293, |
| "learning_rate": 1.0563849721401792e-06, |
| "loss": 0.7772, |
| "step": 2709 |
| }, |
| { |
| "epoch": 0.07908137210557802, |
| "grad_norm": 0.3438391089439392, |
| "learning_rate": 1.053552351858827e-06, |
| "loss": 0.3878, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.07911055342369815, |
| "grad_norm": 0.35798415541648865, |
| "learning_rate": 1.0507230871126312e-06, |
| "loss": 0.5366, |
| "step": 2711 |
| }, |
| { |
| "epoch": 0.07913973474181829, |
| "grad_norm": 0.3710290193557739, |
| "learning_rate": 1.0478971803072253e-06, |
| "loss": 0.6039, |
| "step": 2712 |
| }, |
| { |
| "epoch": 0.07916891605993842, |
| "grad_norm": 0.39147600531578064, |
| "learning_rate": 1.0450746338453837e-06, |
| "loss": 0.6001, |
| "step": 2713 |
| }, |
| { |
| "epoch": 0.07919809737805857, |
| "grad_norm": 0.37071356177330017, |
| "learning_rate": 1.0422554501270248e-06, |
| "loss": 0.5448, |
| "step": 2714 |
| }, |
| { |
| "epoch": 0.07922727869617871, |
| "grad_norm": 0.42035678029060364, |
| "learning_rate": 1.039439631549209e-06, |
| "loss": 1.5034, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.07925646001429884, |
| "grad_norm": 0.32633477449417114, |
| "learning_rate": 1.0366271805061361e-06, |
| "loss": 0.535, |
| "step": 2716 |
| }, |
| { |
| "epoch": 0.07928564133241899, |
| "grad_norm": 0.34279513359069824, |
| "learning_rate": 1.0338180993891378e-06, |
| "loss": 1.0436, |
| "step": 2717 |
| }, |
| { |
| "epoch": 0.07931482265053913, |
| "grad_norm": 0.41138994693756104, |
| "learning_rate": 1.031012390586687e-06, |
| "loss": 1.199, |
| "step": 2718 |
| }, |
| { |
| "epoch": 0.07934400396865926, |
| "grad_norm": 0.3948320746421814, |
| "learning_rate": 1.028210056484385e-06, |
| "loss": 1.019, |
| "step": 2719 |
| }, |
| { |
| "epoch": 0.0793731852867794, |
| "grad_norm": 0.6519201993942261, |
| "learning_rate": 1.0254110994649625e-06, |
| "loss": 1.4876, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.07940236660489955, |
| "grad_norm": 0.3915137052536011, |
| "learning_rate": 1.0226155219082828e-06, |
| "loss": 0.4505, |
| "step": 2721 |
| }, |
| { |
| "epoch": 0.07943154792301968, |
| "grad_norm": 0.3932513892650604, |
| "learning_rate": 1.0198233261913349e-06, |
| "loss": 0.5735, |
| "step": 2722 |
| }, |
| { |
| "epoch": 0.07946072924113982, |
| "grad_norm": 0.3603682219982147, |
| "learning_rate": 1.0170345146882304e-06, |
| "loss": 0.4167, |
| "step": 2723 |
| }, |
| { |
| "epoch": 0.07948991055925997, |
| "grad_norm": 0.3622930347919464, |
| "learning_rate": 1.0142490897702035e-06, |
| "loss": 1.4315, |
| "step": 2724 |
| }, |
| { |
| "epoch": 0.0795190918773801, |
| "grad_norm": 0.3370799124240875, |
| "learning_rate": 1.011467053805611e-06, |
| "loss": 0.4501, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.07954827319550024, |
| "grad_norm": 0.34137091040611267, |
| "learning_rate": 1.0086884091599253e-06, |
| "loss": 1.6899, |
| "step": 2726 |
| }, |
| { |
| "epoch": 0.07957745451362037, |
| "grad_norm": 0.715193510055542, |
| "learning_rate": 1.0059131581957382e-06, |
| "loss": 0.599, |
| "step": 2727 |
| }, |
| { |
| "epoch": 0.07960663583174052, |
| "grad_norm": 0.3690969944000244, |
| "learning_rate": 1.003141303272756e-06, |
| "loss": 0.9911, |
| "step": 2728 |
| }, |
| { |
| "epoch": 0.07963581714986066, |
| "grad_norm": 0.580677330493927, |
| "learning_rate": 1.000372846747794e-06, |
| "loss": 1.0701, |
| "step": 2729 |
| }, |
| { |
| "epoch": 0.0796649984679808, |
| "grad_norm": 0.3324771225452423, |
| "learning_rate": 9.976077909747806e-07, |
| "loss": 1.5002, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.07969417978610094, |
| "grad_norm": 0.36764955520629883, |
| "learning_rate": 9.948461383047526e-07, |
| "loss": 0.522, |
| "step": 2731 |
| }, |
| { |
| "epoch": 0.07972336110422108, |
| "grad_norm": 0.3746655285358429, |
| "learning_rate": 9.920878910858527e-07, |
| "loss": 1.2669, |
| "step": 2732 |
| }, |
| { |
| "epoch": 0.07975254242234121, |
| "grad_norm": 0.3026464879512787, |
| "learning_rate": 9.89333051663327e-07, |
| "loss": 0.4806, |
| "step": 2733 |
| }, |
| { |
| "epoch": 0.07978172374046136, |
| "grad_norm": 0.37297871708869934, |
| "learning_rate": 9.86581622379526e-07, |
| "loss": 0.928, |
| "step": 2734 |
| }, |
| { |
| "epoch": 0.0798109050585815, |
| "grad_norm": 0.28337720036506653, |
| "learning_rate": 9.838336055739017e-07, |
| "loss": 0.8598, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.07984008637670163, |
| "grad_norm": 0.3750733733177185, |
| "learning_rate": 9.810890035829995e-07, |
| "loss": 0.5293, |
| "step": 2736 |
| }, |
| { |
| "epoch": 0.07986926769482178, |
| "grad_norm": 0.3876027464866638, |
| "learning_rate": 9.783478187404678e-07, |
| "loss": 0.5494, |
| "step": 2737 |
| }, |
| { |
| "epoch": 0.07989844901294191, |
| "grad_norm": 0.36022207140922546, |
| "learning_rate": 9.756100533770446e-07, |
| "loss": 1.1829, |
| "step": 2738 |
| }, |
| { |
| "epoch": 0.07992763033106205, |
| "grad_norm": 0.46664199233055115, |
| "learning_rate": 9.72875709820561e-07, |
| "loss": 0.6604, |
| "step": 2739 |
| }, |
| { |
| "epoch": 0.0799568116491822, |
| "grad_norm": 0.338044136762619, |
| "learning_rate": 9.701447903959404e-07, |
| "loss": 0.9541, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.07998599296730233, |
| "grad_norm": 0.6823078989982605, |
| "learning_rate": 9.674172974251966e-07, |
| "loss": 0.3485, |
| "step": 2741 |
| }, |
| { |
| "epoch": 0.08001517428542247, |
| "grad_norm": 0.433267205953598, |
| "learning_rate": 9.64693233227425e-07, |
| "loss": 0.8906, |
| "step": 2742 |
| }, |
| { |
| "epoch": 0.08004435560354262, |
| "grad_norm": 0.4067397117614746, |
| "learning_rate": 9.619726001188078e-07, |
| "loss": 0.6399, |
| "step": 2743 |
| }, |
| { |
| "epoch": 0.08007353692166275, |
| "grad_norm": 0.38058000802993774, |
| "learning_rate": 9.592554004126125e-07, |
| "loss": 0.3394, |
| "step": 2744 |
| }, |
| { |
| "epoch": 0.08010271823978289, |
| "grad_norm": 0.4159359633922577, |
| "learning_rate": 9.565416364191825e-07, |
| "loss": 0.5434, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.08013189955790304, |
| "grad_norm": 0.37545689940452576, |
| "learning_rate": 9.538313104459435e-07, |
| "loss": 1.0532, |
| "step": 2746 |
| }, |
| { |
| "epoch": 0.08016108087602317, |
| "grad_norm": 0.5913337469100952, |
| "learning_rate": 9.511244247973983e-07, |
| "loss": 0.5298, |
| "step": 2747 |
| }, |
| { |
| "epoch": 0.08019026219414331, |
| "grad_norm": 0.36186471581459045, |
| "learning_rate": 9.484209817751216e-07, |
| "loss": 0.7762, |
| "step": 2748 |
| }, |
| { |
| "epoch": 0.08021944351226346, |
| "grad_norm": 0.3885456919670105, |
| "learning_rate": 9.457209836777615e-07, |
| "loss": 0.7194, |
| "step": 2749 |
| }, |
| { |
| "epoch": 0.08024862483038359, |
| "grad_norm": 0.3861290514469147, |
| "learning_rate": 9.430244328010408e-07, |
| "loss": 0.5904, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.08027780614850373, |
| "grad_norm": 0.33283811807632446, |
| "learning_rate": 9.403313314377444e-07, |
| "loss": 1.1152, |
| "step": 2751 |
| }, |
| { |
| "epoch": 0.08030698746662386, |
| "grad_norm": 0.35944461822509766, |
| "learning_rate": 9.376416818777311e-07, |
| "loss": 0.5928, |
| "step": 2752 |
| }, |
| { |
| "epoch": 0.080336168784744, |
| "grad_norm": 0.39709368348121643, |
| "learning_rate": 9.349554864079219e-07, |
| "loss": 0.5727, |
| "step": 2753 |
| }, |
| { |
| "epoch": 0.08036535010286415, |
| "grad_norm": 0.3878830671310425, |
| "learning_rate": 9.322727473123005e-07, |
| "loss": 0.8218, |
| "step": 2754 |
| }, |
| { |
| "epoch": 0.08039453142098428, |
| "grad_norm": 0.406269907951355, |
| "learning_rate": 9.295934668719103e-07, |
| "loss": 0.5407, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.08042371273910442, |
| "grad_norm": 0.33966565132141113, |
| "learning_rate": 9.269176473648578e-07, |
| "loss": 0.4675, |
| "step": 2756 |
| }, |
| { |
| "epoch": 0.08045289405722457, |
| "grad_norm": 0.3084021210670471, |
| "learning_rate": 9.242452910663041e-07, |
| "loss": 0.4255, |
| "step": 2757 |
| }, |
| { |
| "epoch": 0.0804820753753447, |
| "grad_norm": 0.5439592003822327, |
| "learning_rate": 9.215764002484639e-07, |
| "loss": 1.5691, |
| "step": 2758 |
| }, |
| { |
| "epoch": 0.08051125669346484, |
| "grad_norm": 0.35782378911972046, |
| "learning_rate": 9.189109771806115e-07, |
| "loss": 1.6391, |
| "step": 2759 |
| }, |
| { |
| "epoch": 0.08054043801158499, |
| "grad_norm": 0.3929624855518341, |
| "learning_rate": 9.162490241290678e-07, |
| "loss": 0.5262, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.08056961932970512, |
| "grad_norm": 0.4435413181781769, |
| "learning_rate": 9.135905433572029e-07, |
| "loss": 0.6524, |
| "step": 2761 |
| }, |
| { |
| "epoch": 0.08059880064782526, |
| "grad_norm": 0.3423488736152649, |
| "learning_rate": 9.109355371254358e-07, |
| "loss": 0.5099, |
| "step": 2762 |
| }, |
| { |
| "epoch": 0.08062798196594541, |
| "grad_norm": 0.3287017345428467, |
| "learning_rate": 9.08284007691233e-07, |
| "loss": 0.9522, |
| "step": 2763 |
| }, |
| { |
| "epoch": 0.08065716328406554, |
| "grad_norm": 0.40046530961990356, |
| "learning_rate": 9.056359573091006e-07, |
| "loss": 0.6298, |
| "step": 2764 |
| }, |
| { |
| "epoch": 0.08068634460218568, |
| "grad_norm": 0.44617435336112976, |
| "learning_rate": 9.029913882305912e-07, |
| "loss": 0.8594, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.08071552592030581, |
| "grad_norm": 0.40023496747016907, |
| "learning_rate": 9.00350302704296e-07, |
| "loss": 1.0644, |
| "step": 2766 |
| }, |
| { |
| "epoch": 0.08074470723842596, |
| "grad_norm": 0.4678109884262085, |
| "learning_rate": 8.97712702975842e-07, |
| "loss": 0.6731, |
| "step": 2767 |
| }, |
| { |
| "epoch": 0.0807738885565461, |
| "grad_norm": 0.3775061070919037, |
| "learning_rate": 8.950785912878929e-07, |
| "loss": 0.6218, |
| "step": 2768 |
| }, |
| { |
| "epoch": 0.08080306987466623, |
| "grad_norm": 0.3524286150932312, |
| "learning_rate": 8.924479698801497e-07, |
| "loss": 0.5255, |
| "step": 2769 |
| }, |
| { |
| "epoch": 0.08083225119278638, |
| "grad_norm": 0.42107194662094116, |
| "learning_rate": 8.898208409893421e-07, |
| "loss": 1.1148, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.08086143251090652, |
| "grad_norm": 0.4046686887741089, |
| "learning_rate": 8.871972068492318e-07, |
| "loss": 0.7876, |
| "step": 2771 |
| }, |
| { |
| "epoch": 0.08089061382902665, |
| "grad_norm": 0.31776970624923706, |
| "learning_rate": 8.845770696906108e-07, |
| "loss": 0.3291, |
| "step": 2772 |
| }, |
| { |
| "epoch": 0.0809197951471468, |
| "grad_norm": 0.4083385765552521, |
| "learning_rate": 8.81960431741295e-07, |
| "loss": 0.5866, |
| "step": 2773 |
| }, |
| { |
| "epoch": 0.08094897646526694, |
| "grad_norm": 0.35289865732192993, |
| "learning_rate": 8.793472952261239e-07, |
| "loss": 0.6165, |
| "step": 2774 |
| }, |
| { |
| "epoch": 0.08097815778338707, |
| "grad_norm": 0.40025609731674194, |
| "learning_rate": 8.767376623669644e-07, |
| "loss": 0.6788, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.08100733910150722, |
| "grad_norm": 0.3427574336528778, |
| "learning_rate": 8.741315353827001e-07, |
| "loss": 0.514, |
| "step": 2776 |
| }, |
| { |
| "epoch": 0.08103652041962736, |
| "grad_norm": 0.4109432101249695, |
| "learning_rate": 8.71528916489236e-07, |
| "loss": 0.5295, |
| "step": 2777 |
| }, |
| { |
| "epoch": 0.08106570173774749, |
| "grad_norm": 0.37953630089759827, |
| "learning_rate": 8.689298078994957e-07, |
| "loss": 1.5582, |
| "step": 2778 |
| }, |
| { |
| "epoch": 0.08109488305586764, |
| "grad_norm": 0.341184139251709, |
| "learning_rate": 8.663342118234136e-07, |
| "loss": 0.9897, |
| "step": 2779 |
| }, |
| { |
| "epoch": 0.08112406437398777, |
| "grad_norm": 0.39856502413749695, |
| "learning_rate": 8.6374213046794e-07, |
| "loss": 0.6733, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.08115324569210791, |
| "grad_norm": 0.4532153308391571, |
| "learning_rate": 8.611535660370384e-07, |
| "loss": 1.1734, |
| "step": 2781 |
| }, |
| { |
| "epoch": 0.08118242701022806, |
| "grad_norm": 0.3897307217121124, |
| "learning_rate": 8.585685207316785e-07, |
| "loss": 0.964, |
| "step": 2782 |
| }, |
| { |
| "epoch": 0.08121160832834819, |
| "grad_norm": 0.38897618651390076, |
| "learning_rate": 8.559869967498413e-07, |
| "loss": 0.8199, |
| "step": 2783 |
| }, |
| { |
| "epoch": 0.08124078964646833, |
| "grad_norm": 0.42664968967437744, |
| "learning_rate": 8.534089962865106e-07, |
| "loss": 0.6221, |
| "step": 2784 |
| }, |
| { |
| "epoch": 0.08126997096458848, |
| "grad_norm": 0.3721056878566742, |
| "learning_rate": 8.508345215336772e-07, |
| "loss": 0.4788, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.0812991522827086, |
| "grad_norm": 0.400827556848526, |
| "learning_rate": 8.482635746803325e-07, |
| "loss": 0.9184, |
| "step": 2786 |
| }, |
| { |
| "epoch": 0.08132833360082875, |
| "grad_norm": 0.37725603580474854, |
| "learning_rate": 8.456961579124662e-07, |
| "loss": 0.592, |
| "step": 2787 |
| }, |
| { |
| "epoch": 0.0813575149189489, |
| "grad_norm": 0.3344900608062744, |
| "learning_rate": 8.43132273413072e-07, |
| "loss": 0.971, |
| "step": 2788 |
| }, |
| { |
| "epoch": 0.08138669623706903, |
| "grad_norm": 0.42251962423324585, |
| "learning_rate": 8.405719233621334e-07, |
| "loss": 0.8792, |
| "step": 2789 |
| }, |
| { |
| "epoch": 0.08141587755518917, |
| "grad_norm": 0.37171322107315063, |
| "learning_rate": 8.38015109936634e-07, |
| "loss": 1.0705, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.0814450588733093, |
| "grad_norm": 0.46311017870903015, |
| "learning_rate": 8.354618353105504e-07, |
| "loss": 1.4724, |
| "step": 2791 |
| }, |
| { |
| "epoch": 0.08147424019142945, |
| "grad_norm": 0.3928869068622589, |
| "learning_rate": 8.329121016548458e-07, |
| "loss": 1.1284, |
| "step": 2792 |
| }, |
| { |
| "epoch": 0.08150342150954959, |
| "grad_norm": 0.4900544583797455, |
| "learning_rate": 8.303659111374745e-07, |
| "loss": 0.6326, |
| "step": 2793 |
| }, |
| { |
| "epoch": 0.08153260282766972, |
| "grad_norm": 0.3824761211872101, |
| "learning_rate": 8.27823265923382e-07, |
| "loss": 0.6771, |
| "step": 2794 |
| }, |
| { |
| "epoch": 0.08156178414578986, |
| "grad_norm": 0.3180493116378784, |
| "learning_rate": 8.252841681744933e-07, |
| "loss": 0.6416, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.08159096546391001, |
| "grad_norm": 0.32588139176368713, |
| "learning_rate": 8.227486200497209e-07, |
| "loss": 1.0704, |
| "step": 2796 |
| }, |
| { |
| "epoch": 0.08162014678203014, |
| "grad_norm": 0.35677462816238403, |
| "learning_rate": 8.202166237049597e-07, |
| "loss": 0.6392, |
| "step": 2797 |
| }, |
| { |
| "epoch": 0.08164932810015028, |
| "grad_norm": 0.3788210153579712, |
| "learning_rate": 8.176881812930825e-07, |
| "loss": 1.0492, |
| "step": 2798 |
| }, |
| { |
| "epoch": 0.08167850941827043, |
| "grad_norm": 0.4090243875980377, |
| "learning_rate": 8.151632949639394e-07, |
| "loss": 0.8022, |
| "step": 2799 |
| }, |
| { |
| "epoch": 0.08170769073639056, |
| "grad_norm": 0.5039314031600952, |
| "learning_rate": 8.12641966864361e-07, |
| "loss": 1.0773, |
| "step": 2800 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 3427, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 400, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.886727755412275e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|