| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 6.0, |
| "eval_steps": 50, |
| "global_step": 246, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02476780185758514, |
| "grad_norm": 0.5827791094779968, |
| "learning_rate": 0.0, |
| "loss": 0.5993257761001587, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.04953560371517028, |
| "grad_norm": 0.5781313180923462, |
| "learning_rate": 2.5e-07, |
| "loss": 0.5510573387145996, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.07430340557275542, |
| "grad_norm": 0.5830345153808594, |
| "learning_rate": 5e-07, |
| "loss": 0.500480055809021, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.09907120743034056, |
| "grad_norm": 0.5189770460128784, |
| "learning_rate": 7.5e-07, |
| "loss": 0.5299410820007324, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.1238390092879257, |
| "grad_norm": 0.520061194896698, |
| "learning_rate": 1e-06, |
| "loss": 0.5539457201957703, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.14860681114551083, |
| "grad_norm": 0.5419376492500305, |
| "learning_rate": 1.2499999999999999e-06, |
| "loss": 0.5408970713615417, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.17337461300309598, |
| "grad_norm": 0.5576385855674744, |
| "learning_rate": 1.5e-06, |
| "loss": 0.5969724655151367, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.19814241486068113, |
| "grad_norm": 0.5351932048797607, |
| "learning_rate": 1.75e-06, |
| "loss": 0.5394197106361389, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.22291021671826625, |
| "grad_norm": 0.4773852527141571, |
| "learning_rate": 2e-06, |
| "loss": 0.5735222101211548, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.2476780185758514, |
| "grad_norm": 0.5032294392585754, |
| "learning_rate": 1.9999128816724105e-06, |
| "loss": 0.5828520059585571, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.2724458204334365, |
| "grad_norm": 0.49014607071876526, |
| "learning_rate": 1.9996515418688487e-06, |
| "loss": 0.5568044781684875, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.29721362229102166, |
| "grad_norm": 0.5634818077087402, |
| "learning_rate": 1.9992160261242874e-06, |
| "loss": 0.5982780456542969, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.3219814241486068, |
| "grad_norm": 0.4928373396396637, |
| "learning_rate": 1.9986064103215337e-06, |
| "loss": 0.563035249710083, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.34674922600619196, |
| "grad_norm": 0.5265209674835205, |
| "learning_rate": 1.9978228006780053e-06, |
| "loss": 0.588450014591217, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.3715170278637771, |
| "grad_norm": 0.4966702461242676, |
| "learning_rate": 1.996865333727226e-06, |
| "loss": 0.5518300533294678, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.39628482972136225, |
| "grad_norm": 0.5559803247451782, |
| "learning_rate": 1.9957341762950344e-06, |
| "loss": 0.5778566002845764, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 0.5569736957550049, |
| "learning_rate": 1.9944295254705185e-06, |
| "loss": 0.556509256362915, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.4458204334365325, |
| "grad_norm": 0.5971181988716125, |
| "learning_rate": 1.992951608571673e-06, |
| "loss": 0.5314251780509949, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 0.529690146446228, |
| "learning_rate": 1.9913006831057965e-06, |
| "loss": 0.5227062702178955, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.4953560371517028, |
| "grad_norm": 0.6401184797286987, |
| "learning_rate": 1.989477036724619e-06, |
| "loss": 0.5782433152198792, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.5201238390092879, |
| "grad_norm": 0.539941132068634, |
| "learning_rate": 1.9874809871741874e-06, |
| "loss": 0.5736757516860962, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.544891640866873, |
| "grad_norm": 0.5726771950721741, |
| "learning_rate": 1.9853128822394975e-06, |
| "loss": 0.5858570337295532, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.5696594427244582, |
| "grad_norm": 0.55902498960495, |
| "learning_rate": 1.982973099683902e-06, |
| "loss": 0.5574871301651001, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.5944272445820433, |
| "grad_norm": 0.527619481086731, |
| "learning_rate": 1.9804620471832865e-06, |
| "loss": 0.5171317458152771, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.6191950464396285, |
| "grad_norm": 0.5026052594184875, |
| "learning_rate": 1.9777801622550405e-06, |
| "loss": 0.5416678190231323, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.6439628482972136, |
| "grad_norm": 0.47064998745918274, |
| "learning_rate": 1.9749279121818236e-06, |
| "loss": 0.5682564973831177, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.6687306501547987, |
| "grad_norm": 0.5842341184616089, |
| "learning_rate": 1.9719057939301475e-06, |
| "loss": 0.5644649267196655, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.6934984520123839, |
| "grad_norm": 0.49904075264930725, |
| "learning_rate": 1.9687143340637884e-06, |
| "loss": 0.5811545252799988, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.718266253869969, |
| "grad_norm": 1.2309396266937256, |
| "learning_rate": 1.9653540886520385e-06, |
| "loss": 0.605437695980072, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.7430340557275542, |
| "grad_norm": 0.5156847834587097, |
| "learning_rate": 1.9618256431728192e-06, |
| "loss": 0.5422309637069702, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.7678018575851393, |
| "grad_norm": 0.6013903617858887, |
| "learning_rate": 1.958129612410668e-06, |
| "loss": 0.54377281665802, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.7925696594427245, |
| "grad_norm": 0.5307015180587769, |
| "learning_rate": 1.954266640349623e-06, |
| "loss": 0.5074729919433594, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.8173374613003096, |
| "grad_norm": 0.5950272679328918, |
| "learning_rate": 1.950237400061015e-06, |
| "loss": 0.5290631055831909, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 0.5664405226707458, |
| "learning_rate": 1.9460425935861946e-06, |
| "loss": 0.600000262260437, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.8668730650154799, |
| "grad_norm": 0.5338588953018188, |
| "learning_rate": 1.9416829518142113e-06, |
| "loss": 0.5680241584777832, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.891640866873065, |
| "grad_norm": 0.5495931506156921, |
| "learning_rate": 1.9371592343544655e-06, |
| "loss": 0.5304821729660034, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.9164086687306502, |
| "grad_norm": 0.47950977087020874, |
| "learning_rate": 1.932472229404356e-06, |
| "loss": 0.5156245827674866, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 0.5299308896064758, |
| "learning_rate": 1.9276227536119477e-06, |
| "loss": 0.5732549428939819, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.9659442724458205, |
| "grad_norm": 0.5737171173095703, |
| "learning_rate": 1.9226116519336828e-06, |
| "loss": 0.5309604406356812, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.9907120743034056, |
| "grad_norm": 0.693321168422699, |
| "learning_rate": 1.917439797487156e-06, |
| "loss": 0.5797507762908936, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.9561907649040222, |
| "learning_rate": 1.9121080913989878e-06, |
| "loss": 0.5909802913665771, |
| "step": 41 |
| }, |
| { |
| "epoch": 1.0247678018575852, |
| "grad_norm": 0.6066501140594482, |
| "learning_rate": 1.9066174626478126e-06, |
| "loss": 0.6078804135322571, |
| "step": 42 |
| }, |
| { |
| "epoch": 1.0495356037151702, |
| "grad_norm": 0.5243707299232483, |
| "learning_rate": 1.9009688679024189e-06, |
| "loss": 0.5241413116455078, |
| "step": 43 |
| }, |
| { |
| "epoch": 1.0743034055727554, |
| "grad_norm": 0.5240072011947632, |
| "learning_rate": 1.8951632913550625e-06, |
| "loss": 0.5645661950111389, |
| "step": 44 |
| }, |
| { |
| "epoch": 1.0990712074303406, |
| "grad_norm": 0.6983147263526917, |
| "learning_rate": 1.889201744549981e-06, |
| "loss": 0.5029958486557007, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.1238390092879258, |
| "grad_norm": 0.6109921932220459, |
| "learning_rate": 1.8830852662071505e-06, |
| "loss": 0.5748687386512756, |
| "step": 46 |
| }, |
| { |
| "epoch": 1.1486068111455108, |
| "grad_norm": 0.5242897868156433, |
| "learning_rate": 1.8768149220412987e-06, |
| "loss": 0.5576164722442627, |
| "step": 47 |
| }, |
| { |
| "epoch": 1.173374613003096, |
| "grad_norm": 0.5376689434051514, |
| "learning_rate": 1.8703918045762194e-06, |
| "loss": 0.5489684343338013, |
| "step": 48 |
| }, |
| { |
| "epoch": 1.1981424148606812, |
| "grad_norm": 0.5369903445243835, |
| "learning_rate": 1.863817032954416e-06, |
| "loss": 0.5305777192115784, |
| "step": 49 |
| }, |
| { |
| "epoch": 1.2229102167182662, |
| "grad_norm": 0.482452392578125, |
| "learning_rate": 1.8570917527421045e-06, |
| "loss": 0.4907306134700775, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.2229102167182662, |
| "eval_accuracy": 0.8213776795920542, |
| "eval_loss": 0.5645560622215271, |
| "eval_runtime": 16.7311, |
| "eval_samples_per_second": 4.064, |
| "eval_steps_per_second": 2.032, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.2476780185758514, |
| "grad_norm": 0.5009844899177551, |
| "learning_rate": 1.8502171357296142e-06, |
| "loss": 0.5544570088386536, |
| "step": 51 |
| }, |
| { |
| "epoch": 1.2724458204334366, |
| "grad_norm": 0.5807215571403503, |
| "learning_rate": 1.8431943797272185e-06, |
| "loss": 0.5804014205932617, |
| "step": 52 |
| }, |
| { |
| "epoch": 1.2972136222910216, |
| "grad_norm": 0.5564484596252441, |
| "learning_rate": 1.836024708356434e-06, |
| "loss": 0.5661737322807312, |
| "step": 53 |
| }, |
| { |
| "epoch": 1.3219814241486068, |
| "grad_norm": 0.5095818042755127, |
| "learning_rate": 1.8287093708368186e-06, |
| "loss": 0.5299423336982727, |
| "step": 54 |
| }, |
| { |
| "epoch": 1.346749226006192, |
| "grad_norm": 0.5763193368911743, |
| "learning_rate": 1.8212496417683135e-06, |
| "loss": 0.5352605581283569, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.3715170278637772, |
| "grad_norm": 0.5195797681808472, |
| "learning_rate": 1.81364682090916e-06, |
| "loss": 0.530654788017273, |
| "step": 56 |
| }, |
| { |
| "epoch": 1.3962848297213624, |
| "grad_norm": 0.5399932861328125, |
| "learning_rate": 1.805902232949435e-06, |
| "loss": 0.5673707723617554, |
| "step": 57 |
| }, |
| { |
| "epoch": 1.4210526315789473, |
| "grad_norm": 0.5126509666442871, |
| "learning_rate": 1.7980172272802397e-06, |
| "loss": 0.5673764944076538, |
| "step": 58 |
| }, |
| { |
| "epoch": 1.4458204334365325, |
| "grad_norm": 0.5293602347373962, |
| "learning_rate": 1.789993177758588e-06, |
| "loss": 0.5548557043075562, |
| "step": 59 |
| }, |
| { |
| "epoch": 1.4705882352941178, |
| "grad_norm": 0.47508999705314636, |
| "learning_rate": 1.7818314824680298e-06, |
| "loss": 0.5592916011810303, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.4953560371517027, |
| "grad_norm": 0.506854236125946, |
| "learning_rate": 1.773533563475053e-06, |
| "loss": 0.5494035482406616, |
| "step": 61 |
| }, |
| { |
| "epoch": 1.520123839009288, |
| "grad_norm": 0.6375800371170044, |
| "learning_rate": 1.7651008665813081e-06, |
| "loss": 0.5607191324234009, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.5448916408668731, |
| "grad_norm": 0.4859982132911682, |
| "learning_rate": 1.7565348610716958e-06, |
| "loss": 0.5413356423377991, |
| "step": 63 |
| }, |
| { |
| "epoch": 1.5696594427244581, |
| "grad_norm": 0.5644744634628296, |
| "learning_rate": 1.7478370394583643e-06, |
| "loss": 0.5568721294403076, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.5944272445820433, |
| "grad_norm": 0.5623730421066284, |
| "learning_rate": 1.739008917220659e-06, |
| "loss": 0.5305633544921875, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.6191950464396285, |
| "grad_norm": 0.46600863337516785, |
| "learning_rate": 1.7300520325410698e-06, |
| "loss": 0.519407331943512, |
| "step": 66 |
| }, |
| { |
| "epoch": 1.6439628482972135, |
| "grad_norm": 0.5476927161216736, |
| "learning_rate": 1.7209679460372249e-06, |
| "loss": 0.5438145399093628, |
| "step": 67 |
| }, |
| { |
| "epoch": 1.6687306501547987, |
| "grad_norm": 0.5339446663856506, |
| "learning_rate": 1.711758240489971e-06, |
| "loss": 0.5288221836090088, |
| "step": 68 |
| }, |
| { |
| "epoch": 1.693498452012384, |
| "grad_norm": 0.4474664628505707, |
| "learning_rate": 1.7024245205675985e-06, |
| "loss": 0.5665724277496338, |
| "step": 69 |
| }, |
| { |
| "epoch": 1.718266253869969, |
| "grad_norm": 0.5247179865837097, |
| "learning_rate": 1.6929684125462468e-06, |
| "loss": 0.5420582294464111, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.7430340557275543, |
| "grad_norm": 0.6573188304901123, |
| "learning_rate": 1.6833915640265483e-06, |
| "loss": 0.538118839263916, |
| "step": 71 |
| }, |
| { |
| "epoch": 1.7678018575851393, |
| "grad_norm": 0.5430057644844055, |
| "learning_rate": 1.6736956436465573e-06, |
| "loss": 0.5287379026412964, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.7925696594427245, |
| "grad_norm": 1.451054334640503, |
| "learning_rate": 1.6638823407910082e-06, |
| "loss": 0.5065432190895081, |
| "step": 73 |
| }, |
| { |
| "epoch": 1.8173374613003097, |
| "grad_norm": 1.7800654172897339, |
| "learning_rate": 1.6539533652969682e-06, |
| "loss": 0.5422472357749939, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.8421052631578947, |
| "grad_norm": 0.5204485654830933, |
| "learning_rate": 1.6439104471559156e-06, |
| "loss": 0.4941398501396179, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.86687306501548, |
| "grad_norm": 0.4798074960708618, |
| "learning_rate": 1.6337553362123161e-06, |
| "loss": 0.5543307065963745, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.891640866873065, |
| "grad_norm": 0.4639158248901367, |
| "learning_rate": 1.6234898018587336e-06, |
| "loss": 0.5305337905883789, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.91640866873065, |
| "grad_norm": 0.4957791566848755, |
| "learning_rate": 1.613115632727537e-06, |
| "loss": 0.4810314178466797, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.9411764705882353, |
| "grad_norm": 0.542951762676239, |
| "learning_rate": 1.6026346363792564e-06, |
| "loss": 0.5742234587669373, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.9659442724458205, |
| "grad_norm": 0.518661618232727, |
| "learning_rate": 1.592048638987638e-06, |
| "loss": 0.5540245771408081, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.9907120743034055, |
| "grad_norm": 0.48943665623664856, |
| "learning_rate": 1.5813594850214597e-06, |
| "loss": 0.509993851184845, |
| "step": 81 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.8778729438781738, |
| "learning_rate": 1.570569036923155e-06, |
| "loss": 0.539715051651001, |
| "step": 82 |
| }, |
| { |
| "epoch": 2.024767801857585, |
| "grad_norm": 0.4994299113750458, |
| "learning_rate": 1.5596791747843082e-06, |
| "loss": 0.5089604258537292, |
| "step": 83 |
| }, |
| { |
| "epoch": 2.0495356037151704, |
| "grad_norm": 0.5828955173492432, |
| "learning_rate": 1.548691796018074e-06, |
| "loss": 0.5253075361251831, |
| "step": 84 |
| }, |
| { |
| "epoch": 2.0743034055727554, |
| "grad_norm": 0.5461580753326416, |
| "learning_rate": 1.5376088150285774e-06, |
| "loss": 0.5154924392700195, |
| "step": 85 |
| }, |
| { |
| "epoch": 2.0990712074303404, |
| "grad_norm": 0.965928852558136, |
| "learning_rate": 1.5264321628773558e-06, |
| "loss": 0.5028945803642273, |
| "step": 86 |
| }, |
| { |
| "epoch": 2.123839009287926, |
| "grad_norm": 0.45946890115737915, |
| "learning_rate": 1.5151637869468958e-06, |
| "loss": 0.5220765471458435, |
| "step": 87 |
| }, |
| { |
| "epoch": 2.1486068111455108, |
| "grad_norm": 0.4885327219963074, |
| "learning_rate": 1.5038056506013295e-06, |
| "loss": 0.5020776391029358, |
| "step": 88 |
| }, |
| { |
| "epoch": 2.173374613003096, |
| "grad_norm": 0.5246437191963196, |
| "learning_rate": 1.492359732844342e-06, |
| "loss": 0.46335524320602417, |
| "step": 89 |
| }, |
| { |
| "epoch": 2.198142414860681, |
| "grad_norm": 0.5331137180328369, |
| "learning_rate": 1.4808280279743591e-06, |
| "loss": 0.5037820339202881, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.222910216718266, |
| "grad_norm": 0.5505975484848022, |
| "learning_rate": 1.4692125452370662e-06, |
| "loss": 0.5359715223312378, |
| "step": 91 |
| }, |
| { |
| "epoch": 2.2476780185758516, |
| "grad_norm": 0.5390040278434753, |
| "learning_rate": 1.4575153084753232e-06, |
| "loss": 0.5337521433830261, |
| "step": 92 |
| }, |
| { |
| "epoch": 2.2724458204334366, |
| "grad_norm": 0.44791266322135925, |
| "learning_rate": 1.4457383557765383e-06, |
| "loss": 0.5155265927314758, |
| "step": 93 |
| }, |
| { |
| "epoch": 2.2972136222910216, |
| "grad_norm": 0.4978775382041931, |
| "learning_rate": 1.433883739117558e-06, |
| "loss": 0.4920554757118225, |
| "step": 94 |
| }, |
| { |
| "epoch": 2.321981424148607, |
| "grad_norm": 0.5269660353660583, |
| "learning_rate": 1.4219535240071376e-06, |
| "loss": 0.5533995628356934, |
| "step": 95 |
| }, |
| { |
| "epoch": 2.346749226006192, |
| "grad_norm": 0.4875043034553528, |
| "learning_rate": 1.4099497891260537e-06, |
| "loss": 0.523270845413208, |
| "step": 96 |
| }, |
| { |
| "epoch": 2.371517027863777, |
| "grad_norm": 0.5254143476486206, |
| "learning_rate": 1.3978746259649208e-06, |
| "loss": 0.5255824327468872, |
| "step": 97 |
| }, |
| { |
| "epoch": 2.3962848297213624, |
| "grad_norm": 0.5345160365104675, |
| "learning_rate": 1.3857301384597794e-06, |
| "loss": 0.5329371094703674, |
| "step": 98 |
| }, |
| { |
| "epoch": 2.4210526315789473, |
| "grad_norm": 0.46321746706962585, |
| "learning_rate": 1.3735184426255114e-06, |
| "loss": 0.5548917055130005, |
| "step": 99 |
| }, |
| { |
| "epoch": 2.4458204334365323, |
| "grad_norm": 0.5209585428237915, |
| "learning_rate": 1.3612416661871531e-06, |
| "loss": 0.5931960940361023, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.4458204334365323, |
| "eval_accuracy": 0.8215138901886158, |
| "eval_loss": 0.562470018863678, |
| "eval_runtime": 16.4711, |
| "eval_samples_per_second": 4.128, |
| "eval_steps_per_second": 2.064, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.4705882352941178, |
| "grad_norm": 0.483987033367157, |
| "learning_rate": 1.3489019482091667e-06, |
| "loss": 0.5425853133201599, |
| "step": 101 |
| }, |
| { |
| "epoch": 2.4953560371517027, |
| "grad_norm": 0.44485101103782654, |
| "learning_rate": 1.336501438722739e-06, |
| "loss": 0.5403157472610474, |
| "step": 102 |
| }, |
| { |
| "epoch": 2.5201238390092877, |
| "grad_norm": 0.5460787415504456, |
| "learning_rate": 1.324042298351166e-06, |
| "loss": 0.5747348666191101, |
| "step": 103 |
| }, |
| { |
| "epoch": 2.544891640866873, |
| "grad_norm": 0.45323142409324646, |
| "learning_rate": 1.3115266979333914e-06, |
| "loss": 0.5297880172729492, |
| "step": 104 |
| }, |
| { |
| "epoch": 2.569659442724458, |
| "grad_norm": 0.6902194619178772, |
| "learning_rate": 1.2989568181457702e-06, |
| "loss": 0.5073508024215698, |
| "step": 105 |
| }, |
| { |
| "epoch": 2.594427244582043, |
| "grad_norm": 0.5212258100509644, |
| "learning_rate": 1.2863348491221127e-06, |
| "loss": 0.5311723351478577, |
| "step": 106 |
| }, |
| { |
| "epoch": 2.6191950464396285, |
| "grad_norm": 0.5578774809837341, |
| "learning_rate": 1.273662990072083e-06, |
| "loss": 0.5304839015007019, |
| "step": 107 |
| }, |
| { |
| "epoch": 2.6439628482972135, |
| "grad_norm": 0.504798173904419, |
| "learning_rate": 1.2609434488980166e-06, |
| "loss": 0.4865831136703491, |
| "step": 108 |
| }, |
| { |
| "epoch": 2.6687306501547985, |
| "grad_norm": 0.4682161211967468, |
| "learning_rate": 1.2481784418102239e-06, |
| "loss": 0.5439316630363464, |
| "step": 109 |
| }, |
| { |
| "epoch": 2.693498452012384, |
| "grad_norm": 0.5871185064315796, |
| "learning_rate": 1.2353701929408424e-06, |
| "loss": 0.477615088224411, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.718266253869969, |
| "grad_norm": 0.4735322594642639, |
| "learning_rate": 1.2225209339563143e-06, |
| "loss": 0.5605683326721191, |
| "step": 111 |
| }, |
| { |
| "epoch": 2.7430340557275543, |
| "grad_norm": 0.5656632781028748, |
| "learning_rate": 1.2096329036685466e-06, |
| "loss": 0.5053581595420837, |
| "step": 112 |
| }, |
| { |
| "epoch": 2.7678018575851393, |
| "grad_norm": 0.501797616481781, |
| "learning_rate": 1.196708347644828e-06, |
| "loss": 0.5080878734588623, |
| "step": 113 |
| }, |
| { |
| "epoch": 2.7925696594427247, |
| "grad_norm": 1.2063102722167969, |
| "learning_rate": 1.1837495178165704e-06, |
| "loss": 0.552485466003418, |
| "step": 114 |
| }, |
| { |
| "epoch": 2.8173374613003097, |
| "grad_norm": 0.5052933096885681, |
| "learning_rate": 1.1707586720869374e-06, |
| "loss": 0.5424617528915405, |
| "step": 115 |
| }, |
| { |
| "epoch": 2.8421052631578947, |
| "grad_norm": 0.5184856057167053, |
| "learning_rate": 1.1577380739374373e-06, |
| "loss": 0.5432671904563904, |
| "step": 116 |
| }, |
| { |
| "epoch": 2.86687306501548, |
| "grad_norm": 0.5071874260902405, |
| "learning_rate": 1.1446899920335405e-06, |
| "loss": 0.5507460832595825, |
| "step": 117 |
| }, |
| { |
| "epoch": 2.891640866873065, |
| "grad_norm": 0.519482433795929, |
| "learning_rate": 1.1316166998293935e-06, |
| "loss": 0.5559477210044861, |
| "step": 118 |
| }, |
| { |
| "epoch": 2.91640866873065, |
| "grad_norm": 0.5042552947998047, |
| "learning_rate": 1.1185204751717027e-06, |
| "loss": 0.5015457272529602, |
| "step": 119 |
| }, |
| { |
| "epoch": 2.9411764705882355, |
| "grad_norm": 1.3727635145187378, |
| "learning_rate": 1.1054035999028476e-06, |
| "loss": 0.5176253318786621, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.9659442724458205, |
| "grad_norm": 0.5206997990608215, |
| "learning_rate": 1.092268359463302e-06, |
| "loss": 0.5474892258644104, |
| "step": 121 |
| }, |
| { |
| "epoch": 2.9907120743034055, |
| "grad_norm": 0.472130686044693, |
| "learning_rate": 1.0791170424934246e-06, |
| "loss": 0.4985366463661194, |
| "step": 122 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.058793544769287, |
| "learning_rate": 1.0659519404346952e-06, |
| "loss": 0.48316121101379395, |
| "step": 123 |
| }, |
| { |
| "epoch": 3.024767801857585, |
| "grad_norm": 0.5421841740608215, |
| "learning_rate": 1.0527753471304623e-06, |
| "loss": 0.5144573450088501, |
| "step": 124 |
| }, |
| { |
| "epoch": 3.0495356037151704, |
| "grad_norm": 0.5197970271110535, |
| "learning_rate": 1.0395895584262695e-06, |
| "loss": 0.5817261934280396, |
| "step": 125 |
| }, |
| { |
| "epoch": 3.0743034055727554, |
| "grad_norm": 0.49334728717803955, |
| "learning_rate": 1.0263968717698363e-06, |
| "loss": 0.5018012523651123, |
| "step": 126 |
| }, |
| { |
| "epoch": 3.0990712074303404, |
| "grad_norm": 0.6232290267944336, |
| "learning_rate": 1.013199585810759e-06, |
| "loss": 0.5584498643875122, |
| "step": 127 |
| }, |
| { |
| "epoch": 3.123839009287926, |
| "grad_norm": 0.455437034368515, |
| "learning_rate": 1e-06, |
| "loss": 0.5036893486976624, |
| "step": 128 |
| }, |
| { |
| "epoch": 3.1486068111455108, |
| "grad_norm": 0.48946836590766907, |
| "learning_rate": 9.868004141892412e-07, |
| "loss": 0.5123312473297119, |
| "step": 129 |
| }, |
| { |
| "epoch": 3.173374613003096, |
| "grad_norm": 0.5698655843734741, |
| "learning_rate": 9.736031282301638e-07, |
| "loss": 0.5401725172996521, |
| "step": 130 |
| }, |
| { |
| "epoch": 3.198142414860681, |
| "grad_norm": 0.9283490180969238, |
| "learning_rate": 9.604104415737308e-07, |
| "loss": 0.48566514253616333, |
| "step": 131 |
| }, |
| { |
| "epoch": 3.222910216718266, |
| "grad_norm": 2.0157785415649414, |
| "learning_rate": 9.472246528695375e-07, |
| "loss": 0.4537651538848877, |
| "step": 132 |
| }, |
| { |
| "epoch": 3.2476780185758516, |
| "grad_norm": 0.5449803471565247, |
| "learning_rate": 9.340480595653045e-07, |
| "loss": 0.5530433654785156, |
| "step": 133 |
| }, |
| { |
| "epoch": 3.2724458204334366, |
| "grad_norm": 0.4725954532623291, |
| "learning_rate": 9.208829575065753e-07, |
| "loss": 0.5256283283233643, |
| "step": 134 |
| }, |
| { |
| "epoch": 3.2972136222910216, |
| "grad_norm": 0.4579267203807831, |
| "learning_rate": 9.077316405366981e-07, |
| "loss": 0.5190701484680176, |
| "step": 135 |
| }, |
| { |
| "epoch": 3.321981424148607, |
| "grad_norm": 0.544757604598999, |
| "learning_rate": 8.945964000971523e-07, |
| "loss": 0.5290215015411377, |
| "step": 136 |
| }, |
| { |
| "epoch": 3.346749226006192, |
| "grad_norm": 0.4990670084953308, |
| "learning_rate": 8.814795248282973e-07, |
| "loss": 0.5203908085823059, |
| "step": 137 |
| }, |
| { |
| "epoch": 3.371517027863777, |
| "grad_norm": 0.5583924651145935, |
| "learning_rate": 8.683833001706067e-07, |
| "loss": 0.499897837638855, |
| "step": 138 |
| }, |
| { |
| "epoch": 3.3962848297213624, |
| "grad_norm": 0.47875887155532837, |
| "learning_rate": 8.553100079664598e-07, |
| "loss": 0.4940932095050812, |
| "step": 139 |
| }, |
| { |
| "epoch": 3.4210526315789473, |
| "grad_norm": 0.4689862132072449, |
| "learning_rate": 8.422619260625624e-07, |
| "loss": 0.488369345664978, |
| "step": 140 |
| }, |
| { |
| "epoch": 3.4458204334365323, |
| "grad_norm": 0.5019742846488953, |
| "learning_rate": 8.292413279130624e-07, |
| "loss": 0.49827271699905396, |
| "step": 141 |
| }, |
| { |
| "epoch": 3.4705882352941178, |
| "grad_norm": 0.47474774718284607, |
| "learning_rate": 8.162504821834295e-07, |
| "loss": 0.5006945133209229, |
| "step": 142 |
| }, |
| { |
| "epoch": 3.4953560371517027, |
| "grad_norm": 0.5412342548370361, |
| "learning_rate": 8.032916523551719e-07, |
| "loss": 0.5021499395370483, |
| "step": 143 |
| }, |
| { |
| "epoch": 3.5201238390092877, |
| "grad_norm": 0.46898508071899414, |
| "learning_rate": 7.903670963314535e-07, |
| "loss": 0.5173486471176147, |
| "step": 144 |
| }, |
| { |
| "epoch": 3.544891640866873, |
| "grad_norm": 0.5036367177963257, |
| "learning_rate": 7.774790660436857e-07, |
| "loss": 0.5127341151237488, |
| "step": 145 |
| }, |
| { |
| "epoch": 3.569659442724458, |
| "grad_norm": 0.4592057466506958, |
| "learning_rate": 7.646298070591577e-07, |
| "loss": 0.5291725397109985, |
| "step": 146 |
| }, |
| { |
| "epoch": 3.594427244582043, |
| "grad_norm": 0.579252302646637, |
| "learning_rate": 7.518215581897763e-07, |
| "loss": 0.5540162324905396, |
| "step": 147 |
| }, |
| { |
| "epoch": 3.6191950464396285, |
| "grad_norm": 0.5662134885787964, |
| "learning_rate": 7.390565511019833e-07, |
| "loss": 0.5307095646858215, |
| "step": 148 |
| }, |
| { |
| "epoch": 3.6439628482972135, |
| "grad_norm": 0.5780702233314514, |
| "learning_rate": 7.263370099279171e-07, |
| "loss": 0.48574694991111755, |
| "step": 149 |
| }, |
| { |
| "epoch": 3.6687306501547985, |
| "grad_norm": 0.5063837766647339, |
| "learning_rate": 7.136651508778874e-07, |
| "loss": 0.5621860027313232, |
| "step": 150 |
| }, |
| { |
| "epoch": 3.6687306501547985, |
| "eval_accuracy": 0.8215492383391412, |
| "eval_loss": 0.5617780685424805, |
| "eval_runtime": 16.4087, |
| "eval_samples_per_second": 4.144, |
| "eval_steps_per_second": 2.072, |
| "step": 150 |
| }, |
| { |
| "epoch": 3.693498452012384, |
| "grad_norm": 0.5430096387863159, |
| "learning_rate": 7.010431818542297e-07, |
| "loss": 0.4991950988769531, |
| "step": 151 |
| }, |
| { |
| "epoch": 3.718266253869969, |
| "grad_norm": 0.4858173727989197, |
| "learning_rate": 6.884733020666084e-07, |
| "loss": 0.47163355350494385, |
| "step": 152 |
| }, |
| { |
| "epoch": 3.7430340557275543, |
| "grad_norm": 0.4979320168495178, |
| "learning_rate": 6.759577016488343e-07, |
| "loss": 0.5382797718048096, |
| "step": 153 |
| }, |
| { |
| "epoch": 3.7678018575851393, |
| "grad_norm": 0.47822287678718567, |
| "learning_rate": 6.63498561277261e-07, |
| "loss": 0.5248020887374878, |
| "step": 154 |
| }, |
| { |
| "epoch": 3.7925696594427247, |
| "grad_norm": 0.5561540722846985, |
| "learning_rate": 6.510980517908333e-07, |
| "loss": 0.47944825887680054, |
| "step": 155 |
| }, |
| { |
| "epoch": 3.8173374613003097, |
| "grad_norm": 0.510204553604126, |
| "learning_rate": 6.387583338128471e-07, |
| "loss": 0.5094054937362671, |
| "step": 156 |
| }, |
| { |
| "epoch": 3.8421052631578947, |
| "grad_norm": 0.4817684590816498, |
| "learning_rate": 6.264815573744884e-07, |
| "loss": 0.4909018874168396, |
| "step": 157 |
| }, |
| { |
| "epoch": 3.86687306501548, |
| "grad_norm": 0.4790090024471283, |
| "learning_rate": 6.142698615402204e-07, |
| "loss": 0.47690001130104065, |
| "step": 158 |
| }, |
| { |
| "epoch": 3.891640866873065, |
| "grad_norm": 0.4971541464328766, |
| "learning_rate": 6.021253740350792e-07, |
| "loss": 0.5042445659637451, |
| "step": 159 |
| }, |
| { |
| "epoch": 3.91640866873065, |
| "grad_norm": 0.5663966536521912, |
| "learning_rate": 5.900502108739465e-07, |
| "loss": 0.5802559852600098, |
| "step": 160 |
| }, |
| { |
| "epoch": 3.9411764705882355, |
| "grad_norm": 0.6140542030334473, |
| "learning_rate": 5.780464759928623e-07, |
| "loss": 0.5226213932037354, |
| "step": 161 |
| }, |
| { |
| "epoch": 3.9659442724458205, |
| "grad_norm": 0.510217010974884, |
| "learning_rate": 5.661162608824419e-07, |
| "loss": 0.487061470746994, |
| "step": 162 |
| }, |
| { |
| "epoch": 3.9907120743034055, |
| "grad_norm": 0.47863468527793884, |
| "learning_rate": 5.542616442234618e-07, |
| "loss": 0.49519461393356323, |
| "step": 163 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.8134075999259949, |
| "learning_rate": 5.424846915246769e-07, |
| "loss": 0.5006481409072876, |
| "step": 164 |
| }, |
| { |
| "epoch": 4.024767801857585, |
| "grad_norm": 0.5010446906089783, |
| "learning_rate": 5.307874547629339e-07, |
| "loss": 0.5043383240699768, |
| "step": 165 |
| }, |
| { |
| "epoch": 4.04953560371517, |
| "grad_norm": 0.5629169344902039, |
| "learning_rate": 5.191719720256407e-07, |
| "loss": 0.5104990005493164, |
| "step": 166 |
| }, |
| { |
| "epoch": 4.074303405572755, |
| "grad_norm": 0.5630432367324829, |
| "learning_rate": 5.076402671556577e-07, |
| "loss": 0.4841610789299011, |
| "step": 167 |
| }, |
| { |
| "epoch": 4.099071207430341, |
| "grad_norm": 0.46193253993988037, |
| "learning_rate": 4.961943493986708e-07, |
| "loss": 0.5317561030387878, |
| "step": 168 |
| }, |
| { |
| "epoch": 4.123839009287925, |
| "grad_norm": 0.5281070470809937, |
| "learning_rate": 4.848362130531039e-07, |
| "loss": 0.5141686201095581, |
| "step": 169 |
| }, |
| { |
| "epoch": 4.148606811145511, |
| "grad_norm": 0.927697479724884, |
| "learning_rate": 4.7356783712264403e-07, |
| "loss": 0.46369314193725586, |
| "step": 170 |
| }, |
| { |
| "epoch": 4.173374613003096, |
| "grad_norm": 0.5692654252052307, |
| "learning_rate": 4.623911849714225e-07, |
| "loss": 0.48228251934051514, |
| "step": 171 |
| }, |
| { |
| "epoch": 4.198142414860681, |
| "grad_norm": 0.48862549662590027, |
| "learning_rate": 4.5130820398192636e-07, |
| "loss": 0.5285767316818237, |
| "step": 172 |
| }, |
| { |
| "epoch": 4.222910216718266, |
| "grad_norm": 0.5772708058357239, |
| "learning_rate": 4.40320825215692e-07, |
| "loss": 0.5200311541557312, |
| "step": 173 |
| }, |
| { |
| "epoch": 4.247678018575852, |
| "grad_norm": 0.5576812028884888, |
| "learning_rate": 4.294309630768451e-07, |
| "loss": 0.5052947402000427, |
| "step": 174 |
| }, |
| { |
| "epoch": 4.272445820433436, |
| "grad_norm": 0.48456260561943054, |
| "learning_rate": 4.1864051497854027e-07, |
| "loss": 0.5091853141784668, |
| "step": 175 |
| }, |
| { |
| "epoch": 4.2972136222910216, |
| "grad_norm": 0.4992901086807251, |
| "learning_rate": 4.079513610123618e-07, |
| "loss": 0.5285595655441284, |
| "step": 176 |
| }, |
| { |
| "epoch": 4.321981424148607, |
| "grad_norm": 0.560563862323761, |
| "learning_rate": 3.973653636207437e-07, |
| "loss": 0.5327163338661194, |
| "step": 177 |
| }, |
| { |
| "epoch": 4.346749226006192, |
| "grad_norm": 0.48380428552627563, |
| "learning_rate": 3.8688436727246296e-07, |
| "loss": 0.4750836491584778, |
| "step": 178 |
| }, |
| { |
| "epoch": 4.371517027863777, |
| "grad_norm": 0.4964829385280609, |
| "learning_rate": 3.765101981412665e-07, |
| "loss": 0.46454548835754395, |
| "step": 179 |
| }, |
| { |
| "epoch": 4.396284829721362, |
| "grad_norm": 0.4538560211658478, |
| "learning_rate": 3.6624466378768384e-07, |
| "loss": 0.51465904712677, |
| "step": 180 |
| }, |
| { |
| "epoch": 4.421052631578947, |
| "grad_norm": 0.6692084074020386, |
| "learning_rate": 3.560895528440844e-07, |
| "loss": 0.4617176055908203, |
| "step": 181 |
| }, |
| { |
| "epoch": 4.445820433436532, |
| "grad_norm": 0.47236230969429016, |
| "learning_rate": 3.4604663470303186e-07, |
| "loss": 0.5083804130554199, |
| "step": 182 |
| }, |
| { |
| "epoch": 4.470588235294118, |
| "grad_norm": 0.4774688184261322, |
| "learning_rate": 3.3611765920899183e-07, |
| "loss": 0.5058382749557495, |
| "step": 183 |
| }, |
| { |
| "epoch": 4.495356037151703, |
| "grad_norm": 0.47210627794265747, |
| "learning_rate": 3.263043563534428e-07, |
| "loss": 0.5376588106155396, |
| "step": 184 |
| }, |
| { |
| "epoch": 4.520123839009288, |
| "grad_norm": 0.4772137403488159, |
| "learning_rate": 3.166084359734513e-07, |
| "loss": 0.5304179191589355, |
| "step": 185 |
| }, |
| { |
| "epoch": 4.544891640866873, |
| "grad_norm": 0.4682233929634094, |
| "learning_rate": 3.070315874537531e-07, |
| "loss": 0.4820975661277771, |
| "step": 186 |
| }, |
| { |
| "epoch": 4.569659442724459, |
| "grad_norm": 0.48219650983810425, |
| "learning_rate": 2.975754794324015e-07, |
| "loss": 0.5084782838821411, |
| "step": 187 |
| }, |
| { |
| "epoch": 4.594427244582043, |
| "grad_norm": 0.43362459540367126, |
| "learning_rate": 2.8824175951002916e-07, |
| "loss": 0.47581952810287476, |
| "step": 188 |
| }, |
| { |
| "epoch": 4.6191950464396285, |
| "grad_norm": 0.567948579788208, |
| "learning_rate": 2.790320539627754e-07, |
| "loss": 0.5314459800720215, |
| "step": 189 |
| }, |
| { |
| "epoch": 4.643962848297214, |
| "grad_norm": 0.5087016224861145, |
| "learning_rate": 2.6994796745893e-07, |
| "loss": 0.4740360379219055, |
| "step": 190 |
| }, |
| { |
| "epoch": 4.6687306501547985, |
| "grad_norm": 0.5123845338821411, |
| "learning_rate": 2.60991082779341e-07, |
| "loss": 0.5245854258537292, |
| "step": 191 |
| }, |
| { |
| "epoch": 4.693498452012384, |
| "grad_norm": 0.4884699285030365, |
| "learning_rate": 2.521629605416354e-07, |
| "loss": 0.5254173278808594, |
| "step": 192 |
| }, |
| { |
| "epoch": 4.718266253869969, |
| "grad_norm": 0.5492839217185974, |
| "learning_rate": 2.434651389283042e-07, |
| "loss": 0.5060293674468994, |
| "step": 193 |
| }, |
| { |
| "epoch": 4.743034055727554, |
| "grad_norm": 0.4537581503391266, |
| "learning_rate": 2.3489913341869193e-07, |
| "loss": 0.5028636455535889, |
| "step": 194 |
| }, |
| { |
| "epoch": 4.767801857585139, |
| "grad_norm": 0.5206896662712097, |
| "learning_rate": 2.264664365249469e-07, |
| "loss": 0.509818971157074, |
| "step": 195 |
| }, |
| { |
| "epoch": 4.792569659442725, |
| "grad_norm": 0.5348969101905823, |
| "learning_rate": 2.181685175319702e-07, |
| "loss": 0.4900963306427002, |
| "step": 196 |
| }, |
| { |
| "epoch": 4.817337461300309, |
| "grad_norm": 0.478466659784317, |
| "learning_rate": 2.100068222414121e-07, |
| "loss": 0.5366532802581787, |
| "step": 197 |
| }, |
| { |
| "epoch": 4.842105263157895, |
| "grad_norm": 0.4873082637786865, |
| "learning_rate": 2.0198277271976049e-07, |
| "loss": 0.5138839483261108, |
| "step": 198 |
| }, |
| { |
| "epoch": 4.86687306501548, |
| "grad_norm": 0.5307355523109436, |
| "learning_rate": 1.9409776705056514e-07, |
| "loss": 0.48487958312034607, |
| "step": 199 |
| }, |
| { |
| "epoch": 4.891640866873065, |
| "grad_norm": 0.6182578206062317, |
| "learning_rate": 1.863531790908398e-07, |
| "loss": 0.49715912342071533, |
| "step": 200 |
| }, |
| { |
| "epoch": 4.891640866873065, |
| "eval_accuracy": 0.8215848485329422, |
| "eval_loss": 0.5621271133422852, |
| "eval_runtime": 16.3624, |
| "eval_samples_per_second": 4.156, |
| "eval_steps_per_second": 2.078, |
| "step": 200 |
| }, |
| { |
| "epoch": 4.91640866873065, |
| "grad_norm": 0.5110271573066711, |
| "learning_rate": 1.787503582316864e-07, |
| "loss": 0.5255718231201172, |
| "step": 201 |
| }, |
| { |
| "epoch": 4.9411764705882355, |
| "grad_norm": 0.4957195222377777, |
| "learning_rate": 1.7129062916318137e-07, |
| "loss": 0.5106043219566345, |
| "step": 202 |
| }, |
| { |
| "epoch": 4.965944272445821, |
| "grad_norm": 1.4632741212844849, |
| "learning_rate": 1.6397529164356606e-07, |
| "loss": 0.5344016551971436, |
| "step": 203 |
| }, |
| { |
| "epoch": 4.9907120743034055, |
| "grad_norm": 0.533440113067627, |
| "learning_rate": 1.5680562027278154e-07, |
| "loss": 0.5215489268302917, |
| "step": 204 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.8572560548782349, |
| "learning_rate": 1.49782864270386e-07, |
| "loss": 0.5227999687194824, |
| "step": 205 |
| }, |
| { |
| "epoch": 5.024767801857585, |
| "grad_norm": 0.43222400546073914, |
| "learning_rate": 1.429082472578954e-07, |
| "loss": 0.5099145174026489, |
| "step": 206 |
| }, |
| { |
| "epoch": 5.04953560371517, |
| "grad_norm": 0.47421810030937195, |
| "learning_rate": 1.3618296704558364e-07, |
| "loss": 0.5271211862564087, |
| "step": 207 |
| }, |
| { |
| "epoch": 5.074303405572755, |
| "grad_norm": 0.5383461117744446, |
| "learning_rate": 1.2960819542378053e-07, |
| "loss": 0.548247218132019, |
| "step": 208 |
| }, |
| { |
| "epoch": 5.099071207430341, |
| "grad_norm": 0.513953685760498, |
| "learning_rate": 1.2318507795870137e-07, |
| "loss": 0.47977253794670105, |
| "step": 209 |
| }, |
| { |
| "epoch": 5.123839009287925, |
| "grad_norm": 0.5112437605857849, |
| "learning_rate": 1.1691473379284944e-07, |
| "loss": 0.4924686551094055, |
| "step": 210 |
| }, |
| { |
| "epoch": 5.148606811145511, |
| "grad_norm": 0.5439184308052063, |
| "learning_rate": 1.1079825545001886e-07, |
| "loss": 0.4926351308822632, |
| "step": 211 |
| }, |
| { |
| "epoch": 5.173374613003096, |
| "grad_norm": 0.47784221172332764, |
| "learning_rate": 1.0483670864493777e-07, |
| "loss": 0.5255255699157715, |
| "step": 212 |
| }, |
| { |
| "epoch": 5.198142414860681, |
| "grad_norm": 0.48372480273246765, |
| "learning_rate": 9.903113209758096e-08, |
| "loss": 0.5388856530189514, |
| "step": 213 |
| }, |
| { |
| "epoch": 5.222910216718266, |
| "grad_norm": 0.4922617971897125, |
| "learning_rate": 9.338253735218748e-08, |
| "loss": 0.4714866280555725, |
| "step": 214 |
| }, |
| { |
| "epoch": 5.247678018575852, |
| "grad_norm": 0.5694555044174194, |
| "learning_rate": 8.789190860101226e-08, |
| "loss": 0.49757862091064453, |
| "step": 215 |
| }, |
| { |
| "epoch": 5.272445820433436, |
| "grad_norm": 0.5285799503326416, |
| "learning_rate": 8.256020251284379e-08, |
| "loss": 0.5523006916046143, |
| "step": 216 |
| }, |
| { |
| "epoch": 5.2972136222910216, |
| "grad_norm": 0.542019784450531, |
| "learning_rate": 7.73883480663171e-08, |
| "loss": 0.4939878582954407, |
| "step": 217 |
| }, |
| { |
| "epoch": 5.321981424148607, |
| "grad_norm": 0.4783063232898712, |
| "learning_rate": 7.23772463880522e-08, |
| "loss": 0.5162045359611511, |
| "step": 218 |
| }, |
| { |
| "epoch": 5.346749226006192, |
| "grad_norm": 0.4960096776485443, |
| "learning_rate": 6.75277705956443e-08, |
| "loss": 0.5186662673950195, |
| "step": 219 |
| }, |
| { |
| "epoch": 5.371517027863777, |
| "grad_norm": 0.4951794147491455, |
| "learning_rate": 6.284076564553464e-08, |
| "loss": 0.48755860328674316, |
| "step": 220 |
| }, |
| { |
| "epoch": 5.396284829721362, |
| "grad_norm": 0.4898841381072998, |
| "learning_rate": 5.831704818578842e-08, |
| "loss": 0.5034775733947754, |
| "step": 221 |
| }, |
| { |
| "epoch": 5.421052631578947, |
| "grad_norm": 0.540875256061554, |
| "learning_rate": 5.395740641380531e-08, |
| "loss": 0.4632171094417572, |
| "step": 222 |
| }, |
| { |
| "epoch": 5.445820433436532, |
| "grad_norm": 0.45750898122787476, |
| "learning_rate": 4.976259993898502e-08, |
| "loss": 0.49796921014785767, |
| "step": 223 |
| }, |
| { |
| "epoch": 5.470588235294118, |
| "grad_norm": 0.5052651166915894, |
| "learning_rate": 4.573335965037706e-08, |
| "loss": 0.47650158405303955, |
| "step": 224 |
| }, |
| { |
| "epoch": 5.495356037151703, |
| "grad_norm": 0.4999431371688843, |
| "learning_rate": 4.187038758933203e-08, |
| "loss": 0.49834519624710083, |
| "step": 225 |
| }, |
| { |
| "epoch": 5.520123839009288, |
| "grad_norm": 0.5175738334655762, |
| "learning_rate": 3.817435682718095e-08, |
| "loss": 0.46955606341362, |
| "step": 226 |
| }, |
| { |
| "epoch": 5.544891640866873, |
| "grad_norm": 0.4690812826156616, |
| "learning_rate": 3.464591134796135e-08, |
| "loss": 0.5154824256896973, |
| "step": 227 |
| }, |
| { |
| "epoch": 5.569659442724459, |
| "grad_norm": 0.4758513867855072, |
| "learning_rate": 3.1285665936211516e-08, |
| "loss": 0.5336707830429077, |
| "step": 228 |
| }, |
| { |
| "epoch": 5.594427244582043, |
| "grad_norm": 0.442473441362381, |
| "learning_rate": 2.8094206069852355e-08, |
| "loss": 0.4967498779296875, |
| "step": 229 |
| }, |
| { |
| "epoch": 5.6191950464396285, |
| "grad_norm": 0.4868296682834625, |
| "learning_rate": 2.507208781817638e-08, |
| "loss": 0.5311983823776245, |
| "step": 230 |
| }, |
| { |
| "epoch": 5.643962848297214, |
| "grad_norm": 0.5476986169815063, |
| "learning_rate": 2.221983774495928e-08, |
| "loss": 0.5054424405097961, |
| "step": 231 |
| }, |
| { |
| "epoch": 5.6687306501547985, |
| "grad_norm": 0.4974565804004669, |
| "learning_rate": 1.953795281671333e-08, |
| "loss": 0.5006812214851379, |
| "step": 232 |
| }, |
| { |
| "epoch": 5.693498452012384, |
| "grad_norm": 0.5025091767311096, |
| "learning_rate": 1.7026900316098212e-08, |
| "loss": 0.527012825012207, |
| "step": 233 |
| }, |
| { |
| "epoch": 5.718266253869969, |
| "grad_norm": 0.46924424171447754, |
| "learning_rate": 1.4687117760502576e-08, |
| "loss": 0.4735889434814453, |
| "step": 234 |
| }, |
| { |
| "epoch": 5.743034055727554, |
| "grad_norm": 0.454560786485672, |
| "learning_rate": 1.2519012825812803e-08, |
| "loss": 0.49276185035705566, |
| "step": 235 |
| }, |
| { |
| "epoch": 5.767801857585139, |
| "grad_norm": 0.4710627496242523, |
| "learning_rate": 1.0522963275380492e-08, |
| "loss": 0.5048189759254456, |
| "step": 236 |
| }, |
| { |
| "epoch": 5.792569659442725, |
| "grad_norm": 0.4550038278102875, |
| "learning_rate": 8.699316894203223e-09, |
| "loss": 0.513171911239624, |
| "step": 237 |
| }, |
| { |
| "epoch": 5.817337461300309, |
| "grad_norm": 0.5602344870567322, |
| "learning_rate": 7.048391428326584e-09, |
| "loss": 0.5195218324661255, |
| "step": 238 |
| }, |
| { |
| "epoch": 5.842105263157895, |
| "grad_norm": 0.4764668643474579, |
| "learning_rate": 5.570474529481561e-09, |
| "loss": 0.49439120292663574, |
| "step": 239 |
| }, |
| { |
| "epoch": 5.86687306501548, |
| "grad_norm": 0.7008131146430969, |
| "learning_rate": 4.265823704965532e-09, |
| "loss": 0.5026534795761108, |
| "step": 240 |
| }, |
| { |
| "epoch": 5.891640866873065, |
| "grad_norm": 0.5155523419380188, |
| "learning_rate": 3.1346662727740338e-09, |
| "loss": 0.505569338798523, |
| "step": 241 |
| }, |
| { |
| "epoch": 5.91640866873065, |
| "grad_norm": 0.48813626170158386, |
| "learning_rate": 2.1771993219946718e-09, |
| "loss": 0.4332225024700165, |
| "step": 242 |
| }, |
| { |
| "epoch": 5.9411764705882355, |
| "grad_norm": 0.5733649134635925, |
| "learning_rate": 1.393589678466367e-09, |
| "loss": 0.5184577703475952, |
| "step": 243 |
| }, |
| { |
| "epoch": 5.965944272445821, |
| "grad_norm": 0.47005656361579895, |
| "learning_rate": 7.839738757123848e-10, |
| "loss": 0.48927992582321167, |
| "step": 244 |
| }, |
| { |
| "epoch": 5.9907120743034055, |
| "grad_norm": 0.519534170627594, |
| "learning_rate": 3.484581311511414e-10, |
| "loss": 0.5252695679664612, |
| "step": 245 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.8245697617530823, |
| "learning_rate": 8.711832758934168e-11, |
| "loss": 0.485756516456604, |
| "step": 246 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 246, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 63272699183104.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|