| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.98793242156074, | |
| "eval_steps": 500, | |
| "global_step": 775, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.006436041834271922, | |
| "grad_norm": 857.0773315429688, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 12.2736, | |
| "num_input_tokens_seen": 6576, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.012872083668543845, | |
| "grad_norm": 899.0701293945312, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 12.5518, | |
| "num_input_tokens_seen": 13312, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.019308125502815767, | |
| "grad_norm": 833.7578125, | |
| "learning_rate": 1.5e-06, | |
| "loss": 11.8551, | |
| "num_input_tokens_seen": 19952, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.02574416733708769, | |
| "grad_norm": 712.9902954101562, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 10.9045, | |
| "num_input_tokens_seen": 26640, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.032180209171359615, | |
| "grad_norm": 620.4878540039062, | |
| "learning_rate": 2.5e-06, | |
| "loss": 8.9845, | |
| "num_input_tokens_seen": 33360, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.038616251005631534, | |
| "grad_norm": 376.7406921386719, | |
| "learning_rate": 3e-06, | |
| "loss": 6.35, | |
| "num_input_tokens_seen": 39984, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.04505229283990346, | |
| "grad_norm": 356.7503967285156, | |
| "learning_rate": 3.5e-06, | |
| "loss": 5.4864, | |
| "num_input_tokens_seen": 46496, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.05148833467417538, | |
| "grad_norm": 383.4678955078125, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 2.8013, | |
| "num_input_tokens_seen": 53008, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.057924376508447305, | |
| "grad_norm": 285.64483642578125, | |
| "learning_rate": 4.5e-06, | |
| "loss": 0.9851, | |
| "num_input_tokens_seen": 59856, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.06436041834271923, | |
| "grad_norm": 222.22850036621094, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5088, | |
| "num_input_tokens_seen": 66352, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07079646017699115, | |
| "grad_norm": 338.3565368652344, | |
| "learning_rate": 4.99997891923933e-06, | |
| "loss": 2.022, | |
| "num_input_tokens_seen": 73184, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.07723250201126307, | |
| "grad_norm": 97.87013244628906, | |
| "learning_rate": 4.999915677312839e-06, | |
| "loss": 0.5842, | |
| "num_input_tokens_seen": 80064, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.083668543845535, | |
| "grad_norm": 137.05706787109375, | |
| "learning_rate": 4.999810275287077e-06, | |
| "loss": 0.8538, | |
| "num_input_tokens_seen": 86688, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.09010458567980692, | |
| "grad_norm": 88.00048828125, | |
| "learning_rate": 4.9996627149396075e-06, | |
| "loss": 0.5843, | |
| "num_input_tokens_seen": 93168, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.09654062751407884, | |
| "grad_norm": 61.654090881347656, | |
| "learning_rate": 4.999472998758979e-06, | |
| "loss": 0.4456, | |
| "num_input_tokens_seen": 99696, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.10297666934835076, | |
| "grad_norm": 37.3619499206543, | |
| "learning_rate": 4.99924112994468e-06, | |
| "loss": 0.3542, | |
| "num_input_tokens_seen": 106016, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.10941271118262269, | |
| "grad_norm": 41.219093322753906, | |
| "learning_rate": 4.998967112407087e-06, | |
| "loss": 0.3416, | |
| "num_input_tokens_seen": 112672, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.11584875301689461, | |
| "grad_norm": 22.267297744750977, | |
| "learning_rate": 4.9986509507673986e-06, | |
| "loss": 0.2803, | |
| "num_input_tokens_seen": 119312, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.12228479485116653, | |
| "grad_norm": 22.40268898010254, | |
| "learning_rate": 4.998292650357558e-06, | |
| "loss": 0.2603, | |
| "num_input_tokens_seen": 126016, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.12872083668543846, | |
| "grad_norm": 23.54829978942871, | |
| "learning_rate": 4.99789221722016e-06, | |
| "loss": 0.2456, | |
| "num_input_tokens_seen": 132704, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.13515687851971037, | |
| "grad_norm": 24.01500701904297, | |
| "learning_rate": 4.997449658108354e-06, | |
| "loss": 0.2269, | |
| "num_input_tokens_seen": 139472, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.1415929203539823, | |
| "grad_norm": 26.877809524536133, | |
| "learning_rate": 4.996964980485725e-06, | |
| "loss": 0.2896, | |
| "num_input_tokens_seen": 145968, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.14802896218825423, | |
| "grad_norm": 11.89682388305664, | |
| "learning_rate": 4.996438192526173e-06, | |
| "loss": 0.1415, | |
| "num_input_tokens_seen": 152144, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.15446500402252614, | |
| "grad_norm": 12.671065330505371, | |
| "learning_rate": 4.995869303113768e-06, | |
| "loss": 0.2228, | |
| "num_input_tokens_seen": 158432, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.16090104585679807, | |
| "grad_norm": 12.23610782623291, | |
| "learning_rate": 4.995258321842611e-06, | |
| "loss": 0.1537, | |
| "num_input_tokens_seen": 164672, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.16733708769107, | |
| "grad_norm": 21.917552947998047, | |
| "learning_rate": 4.994605259016658e-06, | |
| "loss": 0.2146, | |
| "num_input_tokens_seen": 170896, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.1737731295253419, | |
| "grad_norm": 6.669096946716309, | |
| "learning_rate": 4.993910125649561e-06, | |
| "loss": 0.1688, | |
| "num_input_tokens_seen": 177904, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.18020917135961384, | |
| "grad_norm": 10.865259170532227, | |
| "learning_rate": 4.99317293346447e-06, | |
| "loss": 0.1592, | |
| "num_input_tokens_seen": 184640, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.18664521319388577, | |
| "grad_norm": 22.876623153686523, | |
| "learning_rate": 4.992393694893844e-06, | |
| "loss": 0.2333, | |
| "num_input_tokens_seen": 191008, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.19308125502815768, | |
| "grad_norm": 12.427145004272461, | |
| "learning_rate": 4.991572423079236e-06, | |
| "loss": 0.1812, | |
| "num_input_tokens_seen": 197568, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1995172968624296, | |
| "grad_norm": 15.451845169067383, | |
| "learning_rate": 4.990709131871074e-06, | |
| "loss": 0.1925, | |
| "num_input_tokens_seen": 204272, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.20595333869670152, | |
| "grad_norm": 18.907636642456055, | |
| "learning_rate": 4.989803835828426e-06, | |
| "loss": 0.1864, | |
| "num_input_tokens_seen": 210944, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.21238938053097345, | |
| "grad_norm": 15.60383129119873, | |
| "learning_rate": 4.988856550218755e-06, | |
| "loss": 0.1848, | |
| "num_input_tokens_seen": 217584, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.21882542236524538, | |
| "grad_norm": 7.129302024841309, | |
| "learning_rate": 4.987867291017662e-06, | |
| "loss": 0.1466, | |
| "num_input_tokens_seen": 224064, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.2252614641995173, | |
| "grad_norm": 15.756115913391113, | |
| "learning_rate": 4.986836074908616e-06, | |
| "loss": 0.1481, | |
| "num_input_tokens_seen": 230880, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.23169750603378922, | |
| "grad_norm": 8.845354080200195, | |
| "learning_rate": 4.985762919282674e-06, | |
| "loss": 0.149, | |
| "num_input_tokens_seen": 237312, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.23813354786806115, | |
| "grad_norm": 15.7093505859375, | |
| "learning_rate": 4.984647842238185e-06, | |
| "loss": 0.1506, | |
| "num_input_tokens_seen": 243648, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.24456958970233306, | |
| "grad_norm": 11.331380844116211, | |
| "learning_rate": 4.983490862580486e-06, | |
| "loss": 0.1709, | |
| "num_input_tokens_seen": 250096, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.251005631536605, | |
| "grad_norm": 6.254825115203857, | |
| "learning_rate": 4.982291999821587e-06, | |
| "loss": 0.0898, | |
| "num_input_tokens_seen": 256432, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.2574416733708769, | |
| "grad_norm": 7.792216777801514, | |
| "learning_rate": 4.98105127417984e-06, | |
| "loss": 0.1856, | |
| "num_input_tokens_seen": 263088, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.26387771520514886, | |
| "grad_norm": 4.186593055725098, | |
| "learning_rate": 4.979768706579595e-06, | |
| "loss": 0.0947, | |
| "num_input_tokens_seen": 269904, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.27031375703942073, | |
| "grad_norm": 7.599153518676758, | |
| "learning_rate": 4.978444318650855e-06, | |
| "loss": 0.1367, | |
| "num_input_tokens_seen": 276592, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.27674979887369267, | |
| "grad_norm": 10.765763282775879, | |
| "learning_rate": 4.977078132728901e-06, | |
| "loss": 0.1645, | |
| "num_input_tokens_seen": 283424, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.2831858407079646, | |
| "grad_norm": 6.705766677856445, | |
| "learning_rate": 4.975670171853926e-06, | |
| "loss": 0.1179, | |
| "num_input_tokens_seen": 290176, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.28962188254223653, | |
| "grad_norm": 6.055794715881348, | |
| "learning_rate": 4.9742204597706386e-06, | |
| "loss": 0.133, | |
| "num_input_tokens_seen": 296752, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.29605792437650846, | |
| "grad_norm": 7.4584760665893555, | |
| "learning_rate": 4.972729020927866e-06, | |
| "loss": 0.083, | |
| "num_input_tokens_seen": 303392, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.3024939662107804, | |
| "grad_norm": 10.979104995727539, | |
| "learning_rate": 4.9711958804781385e-06, | |
| "loss": 0.1748, | |
| "num_input_tokens_seen": 310304, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.3089300080450523, | |
| "grad_norm": 13.912871360778809, | |
| "learning_rate": 4.969621064277271e-06, | |
| "loss": 0.1854, | |
| "num_input_tokens_seen": 317440, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.3153660498793242, | |
| "grad_norm": 6.554210186004639, | |
| "learning_rate": 4.968004598883923e-06, | |
| "loss": 0.1232, | |
| "num_input_tokens_seen": 324304, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.32180209171359614, | |
| "grad_norm": 6.358190536499023, | |
| "learning_rate": 4.966346511559149e-06, | |
| "loss": 0.1172, | |
| "num_input_tokens_seen": 330832, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.32823813354786807, | |
| "grad_norm": 6.195626258850098, | |
| "learning_rate": 4.964646830265944e-06, | |
| "loss": 0.1404, | |
| "num_input_tokens_seen": 337952, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.33467417538214, | |
| "grad_norm": 12.585171699523926, | |
| "learning_rate": 4.962905583668766e-06, | |
| "loss": 0.137, | |
| "num_input_tokens_seen": 344384, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.3411102172164119, | |
| "grad_norm": 3.7672178745269775, | |
| "learning_rate": 4.961122801133059e-06, | |
| "loss": 0.1191, | |
| "num_input_tokens_seen": 351184, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.3475462590506838, | |
| "grad_norm": 17.48076629638672, | |
| "learning_rate": 4.9592985127247525e-06, | |
| "loss": 0.1624, | |
| "num_input_tokens_seen": 357696, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.35398230088495575, | |
| "grad_norm": 7.758498668670654, | |
| "learning_rate": 4.957432749209755e-06, | |
| "loss": 0.1256, | |
| "num_input_tokens_seen": 364368, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.3604183427192277, | |
| "grad_norm": 10.048332214355469, | |
| "learning_rate": 4.955525542053438e-06, | |
| "loss": 0.1274, | |
| "num_input_tokens_seen": 370896, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.3668543845534996, | |
| "grad_norm": 17.495296478271484, | |
| "learning_rate": 4.953576923420105e-06, | |
| "loss": 0.174, | |
| "num_input_tokens_seen": 377168, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.37329042638777155, | |
| "grad_norm": 7.546329021453857, | |
| "learning_rate": 4.9515869261724444e-06, | |
| "loss": 0.0805, | |
| "num_input_tokens_seen": 383728, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.3797264682220434, | |
| "grad_norm": 5.6687188148498535, | |
| "learning_rate": 4.949555583870983e-06, | |
| "loss": 0.1181, | |
| "num_input_tokens_seen": 390448, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.38616251005631536, | |
| "grad_norm": 9.777739524841309, | |
| "learning_rate": 4.9474829307735115e-06, | |
| "loss": 0.1613, | |
| "num_input_tokens_seen": 396960, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3925985518905873, | |
| "grad_norm": 10.064454078674316, | |
| "learning_rate": 4.9453690018345144e-06, | |
| "loss": 0.0885, | |
| "num_input_tokens_seen": 403680, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.3990345937248592, | |
| "grad_norm": 6.493910312652588, | |
| "learning_rate": 4.943213832704575e-06, | |
| "loss": 0.1526, | |
| "num_input_tokens_seen": 410096, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.40547063555913115, | |
| "grad_norm": 7.949091911315918, | |
| "learning_rate": 4.941017459729778e-06, | |
| "loss": 0.1114, | |
| "num_input_tokens_seen": 416672, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.41190667739340303, | |
| "grad_norm": 8.829463958740234, | |
| "learning_rate": 4.938779919951092e-06, | |
| "loss": 0.1139, | |
| "num_input_tokens_seen": 423136, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.41834271922767496, | |
| "grad_norm": 3.495246410369873, | |
| "learning_rate": 4.936501251103751e-06, | |
| "loss": 0.0878, | |
| "num_input_tokens_seen": 429888, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.4247787610619469, | |
| "grad_norm": 8.937992095947266, | |
| "learning_rate": 4.934181491616613e-06, | |
| "loss": 0.1047, | |
| "num_input_tokens_seen": 436720, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.43121480289621883, | |
| "grad_norm": 12.225470542907715, | |
| "learning_rate": 4.9318206806115125e-06, | |
| "loss": 0.1323, | |
| "num_input_tokens_seen": 443648, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.43765084473049076, | |
| "grad_norm": 8.192527770996094, | |
| "learning_rate": 4.929418857902603e-06, | |
| "loss": 0.095, | |
| "num_input_tokens_seen": 450464, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.4440868865647627, | |
| "grad_norm": 9.30573844909668, | |
| "learning_rate": 4.926976063995687e-06, | |
| "loss": 0.2024, | |
| "num_input_tokens_seen": 457296, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.4505229283990346, | |
| "grad_norm": 16.341676712036133, | |
| "learning_rate": 4.9244923400875245e-06, | |
| "loss": 0.1614, | |
| "num_input_tokens_seen": 463984, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.4569589702333065, | |
| "grad_norm": 19.184734344482422, | |
| "learning_rate": 4.921967728065147e-06, | |
| "loss": 0.2073, | |
| "num_input_tokens_seen": 470432, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.46339501206757844, | |
| "grad_norm": 9.802066802978516, | |
| "learning_rate": 4.91940227050515e-06, | |
| "loss": 0.108, | |
| "num_input_tokens_seen": 476736, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.46983105390185037, | |
| "grad_norm": 8.744816780090332, | |
| "learning_rate": 4.916796010672969e-06, | |
| "loss": 0.1455, | |
| "num_input_tokens_seen": 482896, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.4762670957361223, | |
| "grad_norm": 11.598526954650879, | |
| "learning_rate": 4.914148992522157e-06, | |
| "loss": 0.131, | |
| "num_input_tokens_seen": 489504, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.4827031375703942, | |
| "grad_norm": 11.42316722869873, | |
| "learning_rate": 4.911461260693639e-06, | |
| "loss": 0.1416, | |
| "num_input_tokens_seen": 496160, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.4891391794046661, | |
| "grad_norm": 3.1568145751953125, | |
| "learning_rate": 4.908732860514958e-06, | |
| "loss": 0.1045, | |
| "num_input_tokens_seen": 502528, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.49557522123893805, | |
| "grad_norm": 9.457361221313477, | |
| "learning_rate": 4.905963837999518e-06, | |
| "loss": 0.1466, | |
| "num_input_tokens_seen": 509104, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.50201126307321, | |
| "grad_norm": 9.220935821533203, | |
| "learning_rate": 4.903154239845798e-06, | |
| "loss": 0.1502, | |
| "num_input_tokens_seen": 515760, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.5084473049074819, | |
| "grad_norm": 5.706662654876709, | |
| "learning_rate": 4.900304113436571e-06, | |
| "loss": 0.1235, | |
| "num_input_tokens_seen": 522336, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.5148833467417538, | |
| "grad_norm": 8.74252700805664, | |
| "learning_rate": 4.897413506838103e-06, | |
| "loss": 0.1022, | |
| "num_input_tokens_seen": 528960, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5213193885760258, | |
| "grad_norm": 4.498232841491699, | |
| "learning_rate": 4.894482468799344e-06, | |
| "loss": 0.0922, | |
| "num_input_tokens_seen": 535920, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.5277554304102977, | |
| "grad_norm": 3.6902291774749756, | |
| "learning_rate": 4.891511048751102e-06, | |
| "loss": 0.0699, | |
| "num_input_tokens_seen": 542496, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.5341914722445696, | |
| "grad_norm": 5.754522323608398, | |
| "learning_rate": 4.888499296805214e-06, | |
| "loss": 0.1057, | |
| "num_input_tokens_seen": 548752, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.5406275140788415, | |
| "grad_norm": 4.513391017913818, | |
| "learning_rate": 4.8854472637536966e-06, | |
| "loss": 0.0793, | |
| "num_input_tokens_seen": 555696, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.5470635559131134, | |
| "grad_norm": 4.931502342224121, | |
| "learning_rate": 4.882355001067892e-06, | |
| "loss": 0.14, | |
| "num_input_tokens_seen": 562192, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.5534995977473853, | |
| "grad_norm": 6.896547794342041, | |
| "learning_rate": 4.8792225608976e-06, | |
| "loss": 0.1538, | |
| "num_input_tokens_seen": 568672, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.5599356395816573, | |
| "grad_norm": 3.4364850521087646, | |
| "learning_rate": 4.8760499960702005e-06, | |
| "loss": 0.1135, | |
| "num_input_tokens_seen": 575440, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.5663716814159292, | |
| "grad_norm": 6.179934501647949, | |
| "learning_rate": 4.8728373600897535e-06, | |
| "loss": 0.1253, | |
| "num_input_tokens_seen": 581808, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.5728077232502011, | |
| "grad_norm": 14.744488716125488, | |
| "learning_rate": 4.869584707136109e-06, | |
| "loss": 0.1408, | |
| "num_input_tokens_seen": 588576, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.5792437650844731, | |
| "grad_norm": 8.414978981018066, | |
| "learning_rate": 4.8662920920639866e-06, | |
| "loss": 0.0916, | |
| "num_input_tokens_seen": 595328, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.585679806918745, | |
| "grad_norm": 3.016206979751587, | |
| "learning_rate": 4.86295957040205e-06, | |
| "loss": 0.1016, | |
| "num_input_tokens_seen": 601808, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.5921158487530169, | |
| "grad_norm": 3.084475040435791, | |
| "learning_rate": 4.8595871983519705e-06, | |
| "loss": 0.0936, | |
| "num_input_tokens_seen": 608400, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.5985518905872889, | |
| "grad_norm": 5.78838586807251, | |
| "learning_rate": 4.856175032787485e-06, | |
| "loss": 0.1557, | |
| "num_input_tokens_seen": 615296, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.6049879324215608, | |
| "grad_norm": 4.525265216827393, | |
| "learning_rate": 4.852723131253429e-06, | |
| "loss": 0.0879, | |
| "num_input_tokens_seen": 621888, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.6114239742558326, | |
| "grad_norm": 8.129231452941895, | |
| "learning_rate": 4.849231551964771e-06, | |
| "loss": 0.1399, | |
| "num_input_tokens_seen": 628768, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.6178600160901045, | |
| "grad_norm": 7.902085781097412, | |
| "learning_rate": 4.845700353805629e-06, | |
| "loss": 0.1724, | |
| "num_input_tokens_seen": 635056, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.6242960579243765, | |
| "grad_norm": 7.928036689758301, | |
| "learning_rate": 4.842129596328277e-06, | |
| "loss": 0.1018, | |
| "num_input_tokens_seen": 641872, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.6307320997586484, | |
| "grad_norm": 5.5206756591796875, | |
| "learning_rate": 4.838519339752143e-06, | |
| "loss": 0.0398, | |
| "num_input_tokens_seen": 648752, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.6371681415929203, | |
| "grad_norm": 5.910008907318115, | |
| "learning_rate": 4.834869644962789e-06, | |
| "loss": 0.1094, | |
| "num_input_tokens_seen": 655424, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.6436041834271923, | |
| "grad_norm": 17.12012481689453, | |
| "learning_rate": 4.83118057351089e-06, | |
| "loss": 0.1915, | |
| "num_input_tokens_seen": 662224, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6500402252614642, | |
| "grad_norm": 13.876479148864746, | |
| "learning_rate": 4.827452187611192e-06, | |
| "loss": 0.1518, | |
| "num_input_tokens_seen": 668576, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.6564762670957361, | |
| "grad_norm": 9.082406044006348, | |
| "learning_rate": 4.823684550141464e-06, | |
| "loss": 0.141, | |
| "num_input_tokens_seen": 675232, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.6629123089300081, | |
| "grad_norm": 7.364652633666992, | |
| "learning_rate": 4.819877724641437e-06, | |
| "loss": 0.1564, | |
| "num_input_tokens_seen": 681856, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.66934835076428, | |
| "grad_norm": 16.54301643371582, | |
| "learning_rate": 4.8160317753117326e-06, | |
| "loss": 0.1267, | |
| "num_input_tokens_seen": 688416, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.6757843925985519, | |
| "grad_norm": 25.702648162841797, | |
| "learning_rate": 4.81214676701278e-06, | |
| "loss": 0.228, | |
| "num_input_tokens_seen": 695248, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.6822204344328238, | |
| "grad_norm": 17.066158294677734, | |
| "learning_rate": 4.808222765263724e-06, | |
| "loss": 0.1532, | |
| "num_input_tokens_seen": 701952, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.6886564762670957, | |
| "grad_norm": 11.833669662475586, | |
| "learning_rate": 4.8042598362413175e-06, | |
| "loss": 0.1482, | |
| "num_input_tokens_seen": 708368, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.6950925181013676, | |
| "grad_norm": 2.9714369773864746, | |
| "learning_rate": 4.800258046778809e-06, | |
| "loss": 0.1074, | |
| "num_input_tokens_seen": 714768, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.7015285599356396, | |
| "grad_norm": 9.384042739868164, | |
| "learning_rate": 4.796217464364808e-06, | |
| "loss": 0.1163, | |
| "num_input_tokens_seen": 721600, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.7079646017699115, | |
| "grad_norm": 12.922999382019043, | |
| "learning_rate": 4.792138157142158e-06, | |
| "loss": 0.1871, | |
| "num_input_tokens_seen": 728448, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.7144006436041834, | |
| "grad_norm": 5.947402000427246, | |
| "learning_rate": 4.788020193906776e-06, | |
| "loss": 0.092, | |
| "num_input_tokens_seen": 734720, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.7208366854384554, | |
| "grad_norm": 6.692570686340332, | |
| "learning_rate": 4.783863644106502e-06, | |
| "loss": 0.0959, | |
| "num_input_tokens_seen": 741216, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 4.1358771324157715, | |
| "learning_rate": 4.779668577839921e-06, | |
| "loss": 0.0853, | |
| "num_input_tokens_seen": 747664, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.7337087691069992, | |
| "grad_norm": 4.695752143859863, | |
| "learning_rate": 4.775435065855183e-06, | |
| "loss": 0.1581, | |
| "num_input_tokens_seen": 754480, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.7401448109412712, | |
| "grad_norm": 3.8357784748077393, | |
| "learning_rate": 4.771163179548809e-06, | |
| "loss": 0.0907, | |
| "num_input_tokens_seen": 761344, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.7465808527755431, | |
| "grad_norm": 4.455271244049072, | |
| "learning_rate": 4.766852990964492e-06, | |
| "loss": 0.0826, | |
| "num_input_tokens_seen": 768160, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.7530168946098149, | |
| "grad_norm": 3.682065963745117, | |
| "learning_rate": 4.762504572791873e-06, | |
| "loss": 0.0979, | |
| "num_input_tokens_seen": 774496, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.7594529364440868, | |
| "grad_norm": 6.100201606750488, | |
| "learning_rate": 4.7581179983653224e-06, | |
| "loss": 0.1617, | |
| "num_input_tokens_seen": 781232, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.7658889782783588, | |
| "grad_norm": 7.6822991371154785, | |
| "learning_rate": 4.753693341662702e-06, | |
| "loss": 0.1306, | |
| "num_input_tokens_seen": 788064, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.7723250201126307, | |
| "grad_norm": 4.73075008392334, | |
| "learning_rate": 4.749230677304114e-06, | |
| "loss": 0.0955, | |
| "num_input_tokens_seen": 794656, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7787610619469026, | |
| "grad_norm": 4.435886859893799, | |
| "learning_rate": 4.7447300805506455e-06, | |
| "loss": 0.0683, | |
| "num_input_tokens_seen": 801184, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.7851971037811746, | |
| "grad_norm": 3.517606735229492, | |
| "learning_rate": 4.7401916273031e-06, | |
| "loss": 0.1117, | |
| "num_input_tokens_seen": 808000, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.7916331456154465, | |
| "grad_norm": 3.445953130722046, | |
| "learning_rate": 4.7356153941007145e-06, | |
| "loss": 0.1115, | |
| "num_input_tokens_seen": 814608, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.7980691874497184, | |
| "grad_norm": 2.4660255908966064, | |
| "learning_rate": 4.73100145811987e-06, | |
| "loss": 0.0945, | |
| "num_input_tokens_seen": 821072, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.8045052292839904, | |
| "grad_norm": 6.673710346221924, | |
| "learning_rate": 4.726349897172791e-06, | |
| "loss": 0.1125, | |
| "num_input_tokens_seen": 827840, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.8109412711182623, | |
| "grad_norm": 7.910948753356934, | |
| "learning_rate": 4.721660789706232e-06, | |
| "loss": 0.15, | |
| "num_input_tokens_seen": 834880, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.8173773129525342, | |
| "grad_norm": 4.5563154220581055, | |
| "learning_rate": 4.716934214800155e-06, | |
| "loss": 0.1015, | |
| "num_input_tokens_seen": 841360, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.8238133547868061, | |
| "grad_norm": 5.7200422286987305, | |
| "learning_rate": 4.712170252166395e-06, | |
| "loss": 0.1271, | |
| "num_input_tokens_seen": 847888, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.830249396621078, | |
| "grad_norm": 4.4525465965271, | |
| "learning_rate": 4.707368982147318e-06, | |
| "loss": 0.0762, | |
| "num_input_tokens_seen": 854896, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.8366854384553499, | |
| "grad_norm": 4.427840232849121, | |
| "learning_rate": 4.702530485714462e-06, | |
| "loss": 0.1196, | |
| "num_input_tokens_seen": 861600, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.8431214802896219, | |
| "grad_norm": 3.674197196960449, | |
| "learning_rate": 4.697654844467175e-06, | |
| "loss": 0.0866, | |
| "num_input_tokens_seen": 868272, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.8495575221238938, | |
| "grad_norm": 7.5055413246154785, | |
| "learning_rate": 4.69274214063124e-06, | |
| "loss": 0.0718, | |
| "num_input_tokens_seen": 875232, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.8559935639581657, | |
| "grad_norm": 7.523169040679932, | |
| "learning_rate": 4.687792457057482e-06, | |
| "loss": 0.0808, | |
| "num_input_tokens_seen": 882112, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.8624296057924377, | |
| "grad_norm": 10.57685375213623, | |
| "learning_rate": 4.682805877220378e-06, | |
| "loss": 0.1069, | |
| "num_input_tokens_seen": 888848, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.8688656476267096, | |
| "grad_norm": 6.235794544219971, | |
| "learning_rate": 4.677782485216644e-06, | |
| "loss": 0.0804, | |
| "num_input_tokens_seen": 895136, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.8753016894609815, | |
| "grad_norm": 5.526005268096924, | |
| "learning_rate": 4.672722365763821e-06, | |
| "loss": 0.068, | |
| "num_input_tokens_seen": 901552, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.8817377312952535, | |
| "grad_norm": 6.142871856689453, | |
| "learning_rate": 4.667625604198842e-06, | |
| "loss": 0.1193, | |
| "num_input_tokens_seen": 908272, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.8881737731295254, | |
| "grad_norm": 17.300273895263672, | |
| "learning_rate": 4.662492286476595e-06, | |
| "loss": 0.1535, | |
| "num_input_tokens_seen": 914928, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.8946098149637972, | |
| "grad_norm": 13.767914772033691, | |
| "learning_rate": 4.657322499168475e-06, | |
| "loss": 0.1303, | |
| "num_input_tokens_seen": 921296, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.9010458567980691, | |
| "grad_norm": 5.356888294219971, | |
| "learning_rate": 4.65211632946092e-06, | |
| "loss": 0.0879, | |
| "num_input_tokens_seen": 927728, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.9074818986323411, | |
| "grad_norm": 10.261467933654785, | |
| "learning_rate": 4.646873865153945e-06, | |
| "loss": 0.0986, | |
| "num_input_tokens_seen": 934240, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.913917940466613, | |
| "grad_norm": 14.075957298278809, | |
| "learning_rate": 4.641595194659657e-06, | |
| "loss": 0.1219, | |
| "num_input_tokens_seen": 940832, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.9203539823008849, | |
| "grad_norm": 11.964951515197754, | |
| "learning_rate": 4.63628040700077e-06, | |
| "loss": 0.1303, | |
| "num_input_tokens_seen": 947856, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.9267900241351569, | |
| "grad_norm": 6.297915935516357, | |
| "learning_rate": 4.630929591809095e-06, | |
| "loss": 0.081, | |
| "num_input_tokens_seen": 954160, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.9332260659694288, | |
| "grad_norm": 4.006863594055176, | |
| "learning_rate": 4.625542839324036e-06, | |
| "loss": 0.0979, | |
| "num_input_tokens_seen": 960848, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.9396621078037007, | |
| "grad_norm": 9.041242599487305, | |
| "learning_rate": 4.620120240391065e-06, | |
| "loss": 0.1446, | |
| "num_input_tokens_seen": 967440, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.9460981496379727, | |
| "grad_norm": 14.858406066894531, | |
| "learning_rate": 4.614661886460191e-06, | |
| "loss": 0.1267, | |
| "num_input_tokens_seen": 973808, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.9525341914722446, | |
| "grad_norm": 12.371238708496094, | |
| "learning_rate": 4.609167869584416e-06, | |
| "loss": 0.1095, | |
| "num_input_tokens_seen": 980352, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.9589702333065165, | |
| "grad_norm": 6.89439582824707, | |
| "learning_rate": 4.6036382824181836e-06, | |
| "loss": 0.1252, | |
| "num_input_tokens_seen": 987088, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.9654062751407884, | |
| "grad_norm": 3.6482529640197754, | |
| "learning_rate": 4.598073218215817e-06, | |
| "loss": 0.0645, | |
| "num_input_tokens_seen": 993648, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9718423169750603, | |
| "grad_norm": 6.078918933868408, | |
| "learning_rate": 4.592472770829945e-06, | |
| "loss": 0.0974, | |
| "num_input_tokens_seen": 1000272, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.9782783588093322, | |
| "grad_norm": 10.974119186401367, | |
| "learning_rate": 4.586837034709921e-06, | |
| "loss": 0.0833, | |
| "num_input_tokens_seen": 1006912, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.9847144006436042, | |
| "grad_norm": 8.552461624145508, | |
| "learning_rate": 4.581166104900228e-06, | |
| "loss": 0.0787, | |
| "num_input_tokens_seen": 1013328, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.9911504424778761, | |
| "grad_norm": 8.927652359008789, | |
| "learning_rate": 4.575460077038877e-06, | |
| "loss": 0.0814, | |
| "num_input_tokens_seen": 1020128, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.997586484312148, | |
| "grad_norm": 2.613471269607544, | |
| "learning_rate": 4.569719047355795e-06, | |
| "loss": 0.0278, | |
| "num_input_tokens_seen": 1026848, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.00402252614642, | |
| "grad_norm": 4.19236946105957, | |
| "learning_rate": 4.5639431126712e-06, | |
| "loss": 0.093, | |
| "num_input_tokens_seen": 1033728, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 1.010458567980692, | |
| "grad_norm": 6.943019866943359, | |
| "learning_rate": 4.5581323703939685e-06, | |
| "loss": 0.073, | |
| "num_input_tokens_seen": 1040720, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 1.0168946098149638, | |
| "grad_norm": 6.545025825500488, | |
| "learning_rate": 4.552286918519996e-06, | |
| "loss": 0.0625, | |
| "num_input_tokens_seen": 1047168, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 1.0233306516492358, | |
| "grad_norm": 7.890603065490723, | |
| "learning_rate": 4.5464068556305375e-06, | |
| "loss": 0.0461, | |
| "num_input_tokens_seen": 1053760, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 1.0297666934835077, | |
| "grad_norm": 5.44887638092041, | |
| "learning_rate": 4.540492280890555e-06, | |
| "loss": 0.0318, | |
| "num_input_tokens_seen": 1060176, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.0362027353177796, | |
| "grad_norm": 1.036007285118103, | |
| "learning_rate": 4.534543294047033e-06, | |
| "loss": 0.0068, | |
| "num_input_tokens_seen": 1066768, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 1.0426387771520516, | |
| "grad_norm": 5.863292694091797, | |
| "learning_rate": 4.528559995427309e-06, | |
| "loss": 0.0462, | |
| "num_input_tokens_seen": 1073376, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.0490748189863235, | |
| "grad_norm": 8.744257926940918, | |
| "learning_rate": 4.522542485937369e-06, | |
| "loss": 0.0487, | |
| "num_input_tokens_seen": 1079952, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 1.0555108608205954, | |
| "grad_norm": 6.485115051269531, | |
| "learning_rate": 4.516490867060156e-06, | |
| "loss": 0.0664, | |
| "num_input_tokens_seen": 1086848, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.0619469026548674, | |
| "grad_norm": 3.8945565223693848, | |
| "learning_rate": 4.5104052408538545e-06, | |
| "loss": 0.0347, | |
| "num_input_tokens_seen": 1093328, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.068382944489139, | |
| "grad_norm": 3.5805532932281494, | |
| "learning_rate": 4.504285709950167e-06, | |
| "loss": 0.0202, | |
| "num_input_tokens_seen": 1099840, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.074818986323411, | |
| "grad_norm": 6.033172607421875, | |
| "learning_rate": 4.498132377552587e-06, | |
| "loss": 0.0573, | |
| "num_input_tokens_seen": 1106528, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 1.081255028157683, | |
| "grad_norm": 8.104386329650879, | |
| "learning_rate": 4.491945347434656e-06, | |
| "loss": 0.0848, | |
| "num_input_tokens_seen": 1113424, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.0876910699919549, | |
| "grad_norm": 6.130471229553223, | |
| "learning_rate": 4.485724723938215e-06, | |
| "loss": 0.0464, | |
| "num_input_tokens_seen": 1120064, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 1.0941271118262268, | |
| "grad_norm": 8.871036529541016, | |
| "learning_rate": 4.479470611971646e-06, | |
| "loss": 0.1004, | |
| "num_input_tokens_seen": 1126960, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.1005631536604987, | |
| "grad_norm": 7.802618026733398, | |
| "learning_rate": 4.473183117008096e-06, | |
| "loss": 0.0842, | |
| "num_input_tokens_seen": 1133664, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 1.1069991954947707, | |
| "grad_norm": 2.848886489868164, | |
| "learning_rate": 4.4668623450837085e-06, | |
| "loss": 0.0452, | |
| "num_input_tokens_seen": 1140048, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.1134352373290426, | |
| "grad_norm": 2.9371185302734375, | |
| "learning_rate": 4.460508402795827e-06, | |
| "loss": 0.0225, | |
| "num_input_tokens_seen": 1146448, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 1.1198712791633145, | |
| "grad_norm": 2.1428751945495605, | |
| "learning_rate": 4.4541213973012005e-06, | |
| "loss": 0.0058, | |
| "num_input_tokens_seen": 1152960, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.1263073209975865, | |
| "grad_norm": 6.480560302734375, | |
| "learning_rate": 4.447701436314176e-06, | |
| "loss": 0.0565, | |
| "num_input_tokens_seen": 1159632, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.1327433628318584, | |
| "grad_norm": 8.678375244140625, | |
| "learning_rate": 4.441248628104884e-06, | |
| "loss": 0.0591, | |
| "num_input_tokens_seen": 1166640, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.1391794046661303, | |
| "grad_norm": 8.184906005859375, | |
| "learning_rate": 4.434763081497407e-06, | |
| "loss": 0.0488, | |
| "num_input_tokens_seen": 1173408, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 1.1456154465004023, | |
| "grad_norm": 3.727961540222168, | |
| "learning_rate": 4.428244905867952e-06, | |
| "loss": 0.0318, | |
| "num_input_tokens_seen": 1179776, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.1520514883346742, | |
| "grad_norm": 7.119325160980225, | |
| "learning_rate": 4.421694211142998e-06, | |
| "loss": 0.064, | |
| "num_input_tokens_seen": 1186720, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.1584875301689461, | |
| "grad_norm": 3.094886541366577, | |
| "learning_rate": 4.415111107797445e-06, | |
| "loss": 0.0465, | |
| "num_input_tokens_seen": 1193408, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.164923572003218, | |
| "grad_norm": 5.577038288116455, | |
| "learning_rate": 4.408495706852758e-06, | |
| "loss": 0.0344, | |
| "num_input_tokens_seen": 1200096, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.17135961383749, | |
| "grad_norm": 7.607036590576172, | |
| "learning_rate": 4.401848119875081e-06, | |
| "loss": 0.0747, | |
| "num_input_tokens_seen": 1206848, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.177795655671762, | |
| "grad_norm": 5.953075885772705, | |
| "learning_rate": 4.395168458973368e-06, | |
| "loss": 0.073, | |
| "num_input_tokens_seen": 1213632, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.1842316975060339, | |
| "grad_norm": 7.784894943237305, | |
| "learning_rate": 4.388456836797484e-06, | |
| "loss": 0.0652, | |
| "num_input_tokens_seen": 1220336, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.1906677393403058, | |
| "grad_norm": 6.535793781280518, | |
| "learning_rate": 4.381713366536312e-06, | |
| "loss": 0.0881, | |
| "num_input_tokens_seen": 1226736, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.1971037811745777, | |
| "grad_norm": 4.9065093994140625, | |
| "learning_rate": 4.374938161915835e-06, | |
| "loss": 0.0676, | |
| "num_input_tokens_seen": 1233536, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.2035398230088497, | |
| "grad_norm": 5.7732648849487305, | |
| "learning_rate": 4.368131337197228e-06, | |
| "loss": 0.0481, | |
| "num_input_tokens_seen": 1240032, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.2099758648431216, | |
| "grad_norm": 5.656060218811035, | |
| "learning_rate": 4.361293007174926e-06, | |
| "loss": 0.0477, | |
| "num_input_tokens_seen": 1247008, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.2164119066773935, | |
| "grad_norm": 3.2787587642669678, | |
| "learning_rate": 4.354423287174686e-06, | |
| "loss": 0.0456, | |
| "num_input_tokens_seen": 1254032, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.2228479485116655, | |
| "grad_norm": 9.526626586914062, | |
| "learning_rate": 4.3475222930516484e-06, | |
| "loss": 0.133, | |
| "num_input_tokens_seen": 1261104, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.2292839903459372, | |
| "grad_norm": 4.272536277770996, | |
| "learning_rate": 4.340590141188377e-06, | |
| "loss": 0.0672, | |
| "num_input_tokens_seen": 1267680, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.235720032180209, | |
| "grad_norm": 4.5576701164245605, | |
| "learning_rate": 4.333626948492898e-06, | |
| "loss": 0.0352, | |
| "num_input_tokens_seen": 1274112, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.242156074014481, | |
| "grad_norm": 2.7765443325042725, | |
| "learning_rate": 4.326632832396733e-06, | |
| "loss": 0.0361, | |
| "num_input_tokens_seen": 1280528, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.248592115848753, | |
| "grad_norm": 2.681631565093994, | |
| "learning_rate": 4.319607910852911e-06, | |
| "loss": 0.0432, | |
| "num_input_tokens_seen": 1287232, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.255028157683025, | |
| "grad_norm": 7.467050075531006, | |
| "learning_rate": 4.3125523023339825e-06, | |
| "loss": 0.0398, | |
| "num_input_tokens_seen": 1293792, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.2614641995172968, | |
| "grad_norm": 2.6330530643463135, | |
| "learning_rate": 4.305466125830023e-06, | |
| "loss": 0.0473, | |
| "num_input_tokens_seen": 1300624, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.2679002413515688, | |
| "grad_norm": 5.228641510009766, | |
| "learning_rate": 4.2983495008466285e-06, | |
| "loss": 0.0394, | |
| "num_input_tokens_seen": 1307520, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.2743362831858407, | |
| "grad_norm": 5.0004191398620605, | |
| "learning_rate": 4.29120254740289e-06, | |
| "loss": 0.0645, | |
| "num_input_tokens_seen": 1313824, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.2807723250201126, | |
| "grad_norm": 5.468844890594482, | |
| "learning_rate": 4.284025386029381e-06, | |
| "loss": 0.0663, | |
| "num_input_tokens_seen": 1319952, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.2872083668543846, | |
| "grad_norm": 6.145412921905518, | |
| "learning_rate": 4.276818137766118e-06, | |
| "loss": 0.1067, | |
| "num_input_tokens_seen": 1326352, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.2936444086886565, | |
| "grad_norm": 5.632473945617676, | |
| "learning_rate": 4.269580924160523e-06, | |
| "loss": 0.0603, | |
| "num_input_tokens_seen": 1332912, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.3000804505229284, | |
| "grad_norm": 1.326751947402954, | |
| "learning_rate": 4.262313867265369e-06, | |
| "loss": 0.0055, | |
| "num_input_tokens_seen": 1339872, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.3065164923572004, | |
| "grad_norm": 6.162146091461182, | |
| "learning_rate": 4.255017089636725e-06, | |
| "loss": 0.0547, | |
| "num_input_tokens_seen": 1346240, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.3129525341914723, | |
| "grad_norm": 2.500483989715576, | |
| "learning_rate": 4.24769071433189e-06, | |
| "loss": 0.0684, | |
| "num_input_tokens_seen": 1353104, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.3193885760257442, | |
| "grad_norm": 5.962297439575195, | |
| "learning_rate": 4.240334864907317e-06, | |
| "loss": 0.0484, | |
| "num_input_tokens_seen": 1359664, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.3258246178600162, | |
| "grad_norm": 4.183216571807861, | |
| "learning_rate": 4.232949665416526e-06, | |
| "loss": 0.0225, | |
| "num_input_tokens_seen": 1366112, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.332260659694288, | |
| "grad_norm": 2.874197244644165, | |
| "learning_rate": 4.225535240408014e-06, | |
| "loss": 0.0278, | |
| "num_input_tokens_seen": 1372912, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.33869670152856, | |
| "grad_norm": 5.831293106079102, | |
| "learning_rate": 4.218091714923157e-06, | |
| "loss": 0.034, | |
| "num_input_tokens_seen": 1379200, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.3451327433628317, | |
| "grad_norm": 4.147435665130615, | |
| "learning_rate": 4.210619214494099e-06, | |
| "loss": 0.0453, | |
| "num_input_tokens_seen": 1385520, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.3515687851971037, | |
| "grad_norm": 6.03895378112793, | |
| "learning_rate": 4.203117865141635e-06, | |
| "loss": 0.0564, | |
| "num_input_tokens_seen": 1391968, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.3580048270313756, | |
| "grad_norm": 4.196593284606934, | |
| "learning_rate": 4.195587793373085e-06, | |
| "loss": 0.0318, | |
| "num_input_tokens_seen": 1398576, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.3644408688656475, | |
| "grad_norm": 6.364063739776611, | |
| "learning_rate": 4.188029126180161e-06, | |
| "loss": 0.0575, | |
| "num_input_tokens_seen": 1405280, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.3708769106999195, | |
| "grad_norm": 5.420915603637695, | |
| "learning_rate": 4.180441991036827e-06, | |
| "loss": 0.0448, | |
| "num_input_tokens_seen": 1411968, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.3773129525341914, | |
| "grad_norm": 5.313647747039795, | |
| "learning_rate": 4.172826515897146e-06, | |
| "loss": 0.0493, | |
| "num_input_tokens_seen": 1418576, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.3837489943684633, | |
| "grad_norm": 3.2573652267456055, | |
| "learning_rate": 4.165182829193126e-06, | |
| "loss": 0.0478, | |
| "num_input_tokens_seen": 1425360, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.3901850362027353, | |
| "grad_norm": 4.227644443511963, | |
| "learning_rate": 4.15751105983255e-06, | |
| "loss": 0.0887, | |
| "num_input_tokens_seen": 1432144, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.3966210780370072, | |
| "grad_norm": 6.514432907104492, | |
| "learning_rate": 4.149811337196808e-06, | |
| "loss": 0.0634, | |
| "num_input_tokens_seen": 1439200, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.4030571198712791, | |
| "grad_norm": 3.3998050689697266, | |
| "learning_rate": 4.142083791138703e-06, | |
| "loss": 0.0349, | |
| "num_input_tokens_seen": 1445728, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.409493161705551, | |
| "grad_norm": 5.725708961486816, | |
| "learning_rate": 4.134328551980279e-06, | |
| "loss": 0.0459, | |
| "num_input_tokens_seen": 1452384, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.415929203539823, | |
| "grad_norm": 3.3524420261383057, | |
| "learning_rate": 4.126545750510605e-06, | |
| "loss": 0.0304, | |
| "num_input_tokens_seen": 1459136, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.422365245374095, | |
| "grad_norm": 9.169583320617676, | |
| "learning_rate": 4.118735517983584e-06, | |
| "loss": 0.0658, | |
| "num_input_tokens_seen": 1465632, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.4288012872083669, | |
| "grad_norm": 5.310299873352051, | |
| "learning_rate": 4.110897986115729e-06, | |
| "loss": 0.087, | |
| "num_input_tokens_seen": 1472592, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.4352373290426388, | |
| "grad_norm": 4.850796222686768, | |
| "learning_rate": 4.1030332870839466e-06, | |
| "loss": 0.0952, | |
| "num_input_tokens_seen": 1479168, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.4416733708769107, | |
| "grad_norm": 5.20851993560791, | |
| "learning_rate": 4.0951415535233065e-06, | |
| "loss": 0.0358, | |
| "num_input_tokens_seen": 1485664, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.4481094127111827, | |
| "grad_norm": 4.104648113250732, | |
| "learning_rate": 4.087222918524807e-06, | |
| "loss": 0.0527, | |
| "num_input_tokens_seen": 1492368, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 2.5263378620147705, | |
| "learning_rate": 4.079277515633127e-06, | |
| "loss": 0.0452, | |
| "num_input_tokens_seen": 1498752, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.4609814963797265, | |
| "grad_norm": 2.5317678451538086, | |
| "learning_rate": 4.0713054788443776e-06, | |
| "loss": 0.0313, | |
| "num_input_tokens_seen": 1505296, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.4674175382139985, | |
| "grad_norm": 5.61666202545166, | |
| "learning_rate": 4.063306942603835e-06, | |
| "loss": 0.0544, | |
| "num_input_tokens_seen": 1511584, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.4738535800482704, | |
| "grad_norm": 3.4415996074676514, | |
| "learning_rate": 4.0552820418036855e-06, | |
| "loss": 0.0428, | |
| "num_input_tokens_seen": 1517776, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.4802896218825423, | |
| "grad_norm": 2.44814395904541, | |
| "learning_rate": 4.0472309117807365e-06, | |
| "loss": 0.0183, | |
| "num_input_tokens_seen": 1524416, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.4867256637168142, | |
| "grad_norm": 3.827312469482422, | |
| "learning_rate": 4.039153688314146e-06, | |
| "loss": 0.0662, | |
| "num_input_tokens_seen": 1530864, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.4931617055510862, | |
| "grad_norm": 2.9351532459259033, | |
| "learning_rate": 4.031050507623125e-06, | |
| "loss": 0.0258, | |
| "num_input_tokens_seen": 1537216, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.4995977473853581, | |
| "grad_norm": 6.040038585662842, | |
| "learning_rate": 4.022921506364644e-06, | |
| "loss": 0.0584, | |
| "num_input_tokens_seen": 1543824, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.50603378921963, | |
| "grad_norm": 2.7363831996917725, | |
| "learning_rate": 4.014766821631128e-06, | |
| "loss": 0.0916, | |
| "num_input_tokens_seen": 1550432, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.512469831053902, | |
| "grad_norm": 4.466485977172852, | |
| "learning_rate": 4.006586590948141e-06, | |
| "loss": 0.0397, | |
| "num_input_tokens_seen": 1556912, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.518905872888174, | |
| "grad_norm": 4.36499547958374, | |
| "learning_rate": 3.998380952272073e-06, | |
| "loss": 0.0405, | |
| "num_input_tokens_seen": 1563456, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.5253419147224458, | |
| "grad_norm": 3.1068978309631348, | |
| "learning_rate": 3.990150043987806e-06, | |
| "loss": 0.0645, | |
| "num_input_tokens_seen": 1570240, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.5317779565567178, | |
| "grad_norm": 4.554339408874512, | |
| "learning_rate": 3.981894004906388e-06, | |
| "loss": 0.0389, | |
| "num_input_tokens_seen": 1576896, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.5382139983909895, | |
| "grad_norm": 2.1207427978515625, | |
| "learning_rate": 3.973612974262685e-06, | |
| "loss": 0.0341, | |
| "num_input_tokens_seen": 1583440, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.5446500402252614, | |
| "grad_norm": 4.71979284286499, | |
| "learning_rate": 3.965307091713037e-06, | |
| "loss": 0.0625, | |
| "num_input_tokens_seen": 1589968, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.5510860820595334, | |
| "grad_norm": 3.9797351360321045, | |
| "learning_rate": 3.956976497332903e-06, | |
| "loss": 0.0651, | |
| "num_input_tokens_seen": 1596416, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.5575221238938053, | |
| "grad_norm": 4.844697952270508, | |
| "learning_rate": 3.948621331614495e-06, | |
| "loss": 0.0391, | |
| "num_input_tokens_seen": 1602944, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.5639581657280772, | |
| "grad_norm": 4.572307109832764, | |
| "learning_rate": 3.9402417354644115e-06, | |
| "loss": 0.0486, | |
| "num_input_tokens_seen": 1609632, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.5703942075623492, | |
| "grad_norm": 7.0537309646606445, | |
| "learning_rate": 3.9318378502012636e-06, | |
| "loss": 0.1192, | |
| "num_input_tokens_seen": 1616096, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.576830249396621, | |
| "grad_norm": 4.42478609085083, | |
| "learning_rate": 3.923409817553284e-06, | |
| "loss": 0.0679, | |
| "num_input_tokens_seen": 1622848, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.583266291230893, | |
| "grad_norm": 5.157562255859375, | |
| "learning_rate": 3.914957779655946e-06, | |
| "loss": 0.0493, | |
| "num_input_tokens_seen": 1629600, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.589702333065165, | |
| "grad_norm": 2.8394153118133545, | |
| "learning_rate": 3.906481879049559e-06, | |
| "loss": 0.0456, | |
| "num_input_tokens_seen": 1636192, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.5961383748994369, | |
| "grad_norm": 2.4742684364318848, | |
| "learning_rate": 3.897982258676867e-06, | |
| "loss": 0.0391, | |
| "num_input_tokens_seen": 1642832, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.6025744167337088, | |
| "grad_norm": 4.165124893188477, | |
| "learning_rate": 3.8894590618806435e-06, | |
| "loss": 0.0501, | |
| "num_input_tokens_seen": 1649904, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.6090104585679805, | |
| "grad_norm": 2.7913286685943604, | |
| "learning_rate": 3.880912432401265e-06, | |
| "loss": 0.0397, | |
| "num_input_tokens_seen": 1656704, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.6154465004022525, | |
| "grad_norm": 4.8400397300720215, | |
| "learning_rate": 3.872342514374291e-06, | |
| "loss": 0.0846, | |
| "num_input_tokens_seen": 1663680, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.6218825422365244, | |
| "grad_norm": 3.111396074295044, | |
| "learning_rate": 3.863749452328035e-06, | |
| "loss": 0.0443, | |
| "num_input_tokens_seen": 1670160, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.6283185840707963, | |
| "grad_norm": 3.1794304847717285, | |
| "learning_rate": 3.855133391181124e-06, | |
| "loss": 0.045, | |
| "num_input_tokens_seen": 1676832, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.6347546259050683, | |
| "grad_norm": 1.6655223369598389, | |
| "learning_rate": 3.846494476240057e-06, | |
| "loss": 0.0172, | |
| "num_input_tokens_seen": 1683664, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.6411906677393402, | |
| "grad_norm": 4.251989841461182, | |
| "learning_rate": 3.837832853196751e-06, | |
| "loss": 0.0949, | |
| "num_input_tokens_seen": 1690208, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.6476267095736121, | |
| "grad_norm": 7.070593357086182, | |
| "learning_rate": 3.8291486681260904e-06, | |
| "loss": 0.0277, | |
| "num_input_tokens_seen": 1697296, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.654062751407884, | |
| "grad_norm": 2.8217155933380127, | |
| "learning_rate": 3.820442067483455e-06, | |
| "loss": 0.0247, | |
| "num_input_tokens_seen": 1703504, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.660498793242156, | |
| "grad_norm": 5.125271320343018, | |
| "learning_rate": 3.811713198102258e-06, | |
| "loss": 0.0549, | |
| "num_input_tokens_seen": 1710016, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.666934835076428, | |
| "grad_norm": 5.227617263793945, | |
| "learning_rate": 3.802962207191463e-06, | |
| "loss": 0.0342, | |
| "num_input_tokens_seen": 1716960, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.6733708769106999, | |
| "grad_norm": 3.3697738647460938, | |
| "learning_rate": 3.794189242333107e-06, | |
| "loss": 0.0617, | |
| "num_input_tokens_seen": 1723504, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.6798069187449718, | |
| "grad_norm": 2.9104015827178955, | |
| "learning_rate": 3.785394451479806e-06, | |
| "loss": 0.0675, | |
| "num_input_tokens_seen": 1730160, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.6862429605792437, | |
| "grad_norm": 4.513949394226074, | |
| "learning_rate": 3.7765779829522674e-06, | |
| "loss": 0.1055, | |
| "num_input_tokens_seen": 1736752, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.6926790024135157, | |
| "grad_norm": 3.0852975845336914, | |
| "learning_rate": 3.7677399854367815e-06, | |
| "loss": 0.0355, | |
| "num_input_tokens_seen": 1743328, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.6991150442477876, | |
| "grad_norm": 3.222297191619873, | |
| "learning_rate": 3.7588806079827147e-06, | |
| "loss": 0.0622, | |
| "num_input_tokens_seen": 1749776, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.7055510860820595, | |
| "grad_norm": 2.017244338989258, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.0297, | |
| "num_input_tokens_seen": 1756512, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.7119871279163315, | |
| "grad_norm": 2.465116262435913, | |
| "learning_rate": 3.7410983112566166e-06, | |
| "loss": 0.0312, | |
| "num_input_tokens_seen": 1762928, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.7184231697506034, | |
| "grad_norm": 2.8471832275390625, | |
| "learning_rate": 3.7321756918760587e-06, | |
| "loss": 0.0811, | |
| "num_input_tokens_seen": 1769392, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.7248592115848753, | |
| "grad_norm": 3.4750540256500244, | |
| "learning_rate": 3.7232322923348093e-06, | |
| "loss": 0.067, | |
| "num_input_tokens_seen": 1776032, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.7312952534191473, | |
| "grad_norm": 2.845557928085327, | |
| "learning_rate": 3.7142682634598016e-06, | |
| "loss": 0.0553, | |
| "num_input_tokens_seen": 1782512, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.7377312952534192, | |
| "grad_norm": 2.0945403575897217, | |
| "learning_rate": 3.7052837564258728e-06, | |
| "loss": 0.021, | |
| "num_input_tokens_seen": 1789280, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.7441673370876911, | |
| "grad_norm": 2.614729642868042, | |
| "learning_rate": 3.6962789227532165e-06, | |
| "loss": 0.0589, | |
| "num_input_tokens_seen": 1795696, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.750603378921963, | |
| "grad_norm": 3.331339120864868, | |
| "learning_rate": 3.6872539143048287e-06, | |
| "loss": 0.0521, | |
| "num_input_tokens_seen": 1802448, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.757039420756235, | |
| "grad_norm": 2.845620632171631, | |
| "learning_rate": 3.6782088832839436e-06, | |
| "loss": 0.0402, | |
| "num_input_tokens_seen": 1809264, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.763475462590507, | |
| "grad_norm": 3.3971211910247803, | |
| "learning_rate": 3.6691439822314672e-06, | |
| "loss": 0.0363, | |
| "num_input_tokens_seen": 1815808, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.7699115044247788, | |
| "grad_norm": 5.249027729034424, | |
| "learning_rate": 3.660059364023409e-06, | |
| "loss": 0.0523, | |
| "num_input_tokens_seen": 1822352, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.7763475462590508, | |
| "grad_norm": 3.6546497344970703, | |
| "learning_rate": 3.650955181868298e-06, | |
| "loss": 0.0255, | |
| "num_input_tokens_seen": 1829056, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.7827835880933227, | |
| "grad_norm": 7.767543792724609, | |
| "learning_rate": 3.641831589304602e-06, | |
| "loss": 0.1031, | |
| "num_input_tokens_seen": 1835696, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.7892196299275946, | |
| "grad_norm": 1.5550068616867065, | |
| "learning_rate": 3.6326887401981386e-06, | |
| "loss": 0.0452, | |
| "num_input_tokens_seen": 1842288, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.7956556717618666, | |
| "grad_norm": 4.8318986892700195, | |
| "learning_rate": 3.6235267887394774e-06, | |
| "loss": 0.0537, | |
| "num_input_tokens_seen": 1848960, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.8020917135961385, | |
| "grad_norm": 4.691814422607422, | |
| "learning_rate": 3.6143458894413463e-06, | |
| "loss": 0.0572, | |
| "num_input_tokens_seen": 1855648, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.8085277554304104, | |
| "grad_norm": 2.6937472820281982, | |
| "learning_rate": 3.6051461971360146e-06, | |
| "loss": 0.0298, | |
| "num_input_tokens_seen": 1862160, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.8149637972646824, | |
| "grad_norm": 4.052839279174805, | |
| "learning_rate": 3.595927866972694e-06, | |
| "loss": 0.037, | |
| "num_input_tokens_seen": 1868896, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.8213998390989543, | |
| "grad_norm": 5.030338287353516, | |
| "learning_rate": 3.586691054414913e-06, | |
| "loss": 0.0783, | |
| "num_input_tokens_seen": 1875248, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.827835880933226, | |
| "grad_norm": 1.9826079607009888, | |
| "learning_rate": 3.577435915237899e-06, | |
| "loss": 0.0436, | |
| "num_input_tokens_seen": 1881728, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.834271922767498, | |
| "grad_norm": 1.8905837535858154, | |
| "learning_rate": 3.5681626055259526e-06, | |
| "loss": 0.0258, | |
| "num_input_tokens_seen": 1888384, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.8407079646017699, | |
| "grad_norm": 1.9678194522857666, | |
| "learning_rate": 3.558871281669811e-06, | |
| "loss": 0.0235, | |
| "num_input_tokens_seen": 1894864, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.8471440064360418, | |
| "grad_norm": 4.199605464935303, | |
| "learning_rate": 3.549562100364014e-06, | |
| "loss": 0.0541, | |
| "num_input_tokens_seen": 1901680, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.8535800482703138, | |
| "grad_norm": 4.100510120391846, | |
| "learning_rate": 3.5402352186042602e-06, | |
| "loss": 0.0767, | |
| "num_input_tokens_seen": 1908304, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.8600160901045857, | |
| "grad_norm": 6.471580982208252, | |
| "learning_rate": 3.530890793684759e-06, | |
| "loss": 0.0558, | |
| "num_input_tokens_seen": 1914736, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.8664521319388576, | |
| "grad_norm": 6.2181525230407715, | |
| "learning_rate": 3.521528983195579e-06, | |
| "loss": 0.0483, | |
| "num_input_tokens_seen": 1921088, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.8728881737731295, | |
| "grad_norm": 3.5814297199249268, | |
| "learning_rate": 3.512149945019989e-06, | |
| "loss": 0.0389, | |
| "num_input_tokens_seen": 1927408, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.8793242156074015, | |
| "grad_norm": 3.193094491958618, | |
| "learning_rate": 3.502753837331797e-06, | |
| "loss": 0.034, | |
| "num_input_tokens_seen": 1934160, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.8857602574416734, | |
| "grad_norm": 3.2676048278808594, | |
| "learning_rate": 3.4933408185926805e-06, | |
| "loss": 0.0921, | |
| "num_input_tokens_seen": 1940912, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.8921962992759453, | |
| "grad_norm": 4.060972690582275, | |
| "learning_rate": 3.4839110475495153e-06, | |
| "loss": 0.0661, | |
| "num_input_tokens_seen": 1947488, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.898632341110217, | |
| "grad_norm": 4.40585470199585, | |
| "learning_rate": 3.4744646832316985e-06, | |
| "loss": 0.0301, | |
| "num_input_tokens_seen": 1954000, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.905068382944489, | |
| "grad_norm": 4.472731113433838, | |
| "learning_rate": 3.465001884948468e-06, | |
| "loss": 0.0878, | |
| "num_input_tokens_seen": 1960400, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.911504424778761, | |
| "grad_norm": 3.2221555709838867, | |
| "learning_rate": 3.45552281228621e-06, | |
| "loss": 0.1126, | |
| "num_input_tokens_seen": 1967728, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.9179404666130329, | |
| "grad_norm": 3.6210269927978516, | |
| "learning_rate": 3.446027625105776e-06, | |
| "loss": 0.0679, | |
| "num_input_tokens_seen": 1974096, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.9243765084473048, | |
| "grad_norm": 2.038454055786133, | |
| "learning_rate": 3.436516483539781e-06, | |
| "loss": 0.031, | |
| "num_input_tokens_seen": 1980672, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.9308125502815767, | |
| "grad_norm": 2.2427828311920166, | |
| "learning_rate": 3.4269895479899023e-06, | |
| "loss": 0.0687, | |
| "num_input_tokens_seen": 1987104, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.9372485921158487, | |
| "grad_norm": 6.37827730178833, | |
| "learning_rate": 3.4174469791241805e-06, | |
| "loss": 0.0497, | |
| "num_input_tokens_seen": 1994064, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.9436846339501206, | |
| "grad_norm": 9.542262077331543, | |
| "learning_rate": 3.4078889378743036e-06, | |
| "loss": 0.0829, | |
| "num_input_tokens_seen": 2001056, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.9501206757843925, | |
| "grad_norm": 6.237174034118652, | |
| "learning_rate": 3.3983155854328942e-06, | |
| "loss": 0.0578, | |
| "num_input_tokens_seen": 2007712, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.9565567176186645, | |
| "grad_norm": 2.3653266429901123, | |
| "learning_rate": 3.388727083250795e-06, | |
| "loss": 0.0398, | |
| "num_input_tokens_seen": 2014368, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.9629927594529364, | |
| "grad_norm": 3.9448723793029785, | |
| "learning_rate": 3.379123593034342e-06, | |
| "loss": 0.0754, | |
| "num_input_tokens_seen": 2020592, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.9694288012872083, | |
| "grad_norm": 2.1158804893493652, | |
| "learning_rate": 3.369505276742638e-06, | |
| "loss": 0.0395, | |
| "num_input_tokens_seen": 2026864, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.9758648431214803, | |
| "grad_norm": 5.131661891937256, | |
| "learning_rate": 3.359872296584821e-06, | |
| "loss": 0.0575, | |
| "num_input_tokens_seen": 2033440, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.9823008849557522, | |
| "grad_norm": 5.0783867835998535, | |
| "learning_rate": 3.350224815017331e-06, | |
| "loss": 0.0472, | |
| "num_input_tokens_seen": 2039712, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.9887369267900241, | |
| "grad_norm": 6.688424587249756, | |
| "learning_rate": 3.3405629947411687e-06, | |
| "loss": 0.0498, | |
| "num_input_tokens_seen": 2046576, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.995172968624296, | |
| "grad_norm": 5.24268102645874, | |
| "learning_rate": 3.3308869986991493e-06, | |
| "loss": 0.0447, | |
| "num_input_tokens_seen": 2053248, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.001609010458568, | |
| "grad_norm": 1.7300570011138916, | |
| "learning_rate": 3.32119699007316e-06, | |
| "loss": 0.0155, | |
| "num_input_tokens_seen": 2059840, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 2.00804505229284, | |
| "grad_norm": 2.5391845703125, | |
| "learning_rate": 3.311493132281402e-06, | |
| "loss": 0.0183, | |
| "num_input_tokens_seen": 2066384, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 2.014481094127112, | |
| "grad_norm": 1.9404152631759644, | |
| "learning_rate": 3.3017755889756382e-06, | |
| "loss": 0.0102, | |
| "num_input_tokens_seen": 2073088, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 2.020917135961384, | |
| "grad_norm": 1.1497960090637207, | |
| "learning_rate": 3.292044524038433e-06, | |
| "loss": 0.0119, | |
| "num_input_tokens_seen": 2079600, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 2.0273531777956557, | |
| "grad_norm": 0.6188907027244568, | |
| "learning_rate": 3.2823001015803863e-06, | |
| "loss": 0.0037, | |
| "num_input_tokens_seen": 2086080, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.0337892196299276, | |
| "grad_norm": 2.5652434825897217, | |
| "learning_rate": 3.272542485937369e-06, | |
| "loss": 0.0048, | |
| "num_input_tokens_seen": 2092768, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 2.0402252614641996, | |
| "grad_norm": 1.3636257648468018, | |
| "learning_rate": 3.2627718416677484e-06, | |
| "loss": 0.004, | |
| "num_input_tokens_seen": 2099296, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 2.0466613032984715, | |
| "grad_norm": 3.7406702041625977, | |
| "learning_rate": 3.2529883335496163e-06, | |
| "loss": 0.0472, | |
| "num_input_tokens_seen": 2106176, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 2.0530973451327434, | |
| "grad_norm": 0.2876489460468292, | |
| "learning_rate": 3.243192126578007e-06, | |
| "loss": 0.0008, | |
| "num_input_tokens_seen": 2112560, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 2.0595333869670154, | |
| "grad_norm": 3.388899087905884, | |
| "learning_rate": 3.2333833859621155e-06, | |
| "loss": 0.0332, | |
| "num_input_tokens_seen": 2119296, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.0659694288012873, | |
| "grad_norm": 2.6212401390075684, | |
| "learning_rate": 3.223562277122513e-06, | |
| "loss": 0.0434, | |
| "num_input_tokens_seen": 2125632, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 2.0724054706355592, | |
| "grad_norm": 3.6854021549224854, | |
| "learning_rate": 3.213728965688356e-06, | |
| "loss": 0.0105, | |
| "num_input_tokens_seen": 2132096, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 2.078841512469831, | |
| "grad_norm": 3.9269893169403076, | |
| "learning_rate": 3.2038836174945907e-06, | |
| "loss": 0.0188, | |
| "num_input_tokens_seen": 2138336, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 2.085277554304103, | |
| "grad_norm": 2.3363194465637207, | |
| "learning_rate": 3.194026398579162e-06, | |
| "loss": 0.0382, | |
| "num_input_tokens_seen": 2144672, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 2.091713596138375, | |
| "grad_norm": 0.16176919639110565, | |
| "learning_rate": 3.184157475180208e-06, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 2151216, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.098149637972647, | |
| "grad_norm": 7.4007368087768555, | |
| "learning_rate": 3.1742770137332567e-06, | |
| "loss": 0.0473, | |
| "num_input_tokens_seen": 2158000, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 2.104585679806919, | |
| "grad_norm": 0.3990660607814789, | |
| "learning_rate": 3.164385180868425e-06, | |
| "loss": 0.0008, | |
| "num_input_tokens_seen": 2164448, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 2.111021721641191, | |
| "grad_norm": 5.447741508483887, | |
| "learning_rate": 3.1544821434076013e-06, | |
| "loss": 0.0123, | |
| "num_input_tokens_seen": 2171120, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 2.1174577634754628, | |
| "grad_norm": 4.229776382446289, | |
| "learning_rate": 3.144568068361634e-06, | |
| "loss": 0.03, | |
| "num_input_tokens_seen": 2177648, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 2.1238938053097347, | |
| "grad_norm": 5.920961380004883, | |
| "learning_rate": 3.1346431229275197e-06, | |
| "loss": 0.0207, | |
| "num_input_tokens_seen": 2183856, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.1303298471440066, | |
| "grad_norm": 11.779773712158203, | |
| "learning_rate": 3.124707474485577e-06, | |
| "loss": 0.0172, | |
| "num_input_tokens_seen": 2190608, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 2.136765888978278, | |
| "grad_norm": 8.82557201385498, | |
| "learning_rate": 3.1147612905966286e-06, | |
| "loss": 0.0115, | |
| "num_input_tokens_seen": 2197232, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 2.14320193081255, | |
| "grad_norm": 1.1176470518112183, | |
| "learning_rate": 3.1048047389991693e-06, | |
| "loss": 0.0217, | |
| "num_input_tokens_seen": 2203456, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 2.149637972646822, | |
| "grad_norm": 0.8806192278862, | |
| "learning_rate": 3.094837987606547e-06, | |
| "loss": 0.0035, | |
| "num_input_tokens_seen": 2209856, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 2.156074014481094, | |
| "grad_norm": 6.793837547302246, | |
| "learning_rate": 3.084861204504122e-06, | |
| "loss": 0.0426, | |
| "num_input_tokens_seen": 2216400, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.162510056315366, | |
| "grad_norm": 5.5410075187683105, | |
| "learning_rate": 3.0748745579464347e-06, | |
| "loss": 0.0382, | |
| "num_input_tokens_seen": 2222864, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 2.168946098149638, | |
| "grad_norm": 1.8118884563446045, | |
| "learning_rate": 3.0648782163543696e-06, | |
| "loss": 0.0082, | |
| "num_input_tokens_seen": 2229760, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 2.1753821399839097, | |
| "grad_norm": 2.607206106185913, | |
| "learning_rate": 3.0548723483123157e-06, | |
| "loss": 0.0338, | |
| "num_input_tokens_seen": 2236368, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 1.080344557762146, | |
| "learning_rate": 3.0448571225653195e-06, | |
| "loss": 0.0141, | |
| "num_input_tokens_seen": 2242816, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 2.1882542236524536, | |
| "grad_norm": 2.380739212036133, | |
| "learning_rate": 3.0348327080162438e-06, | |
| "loss": 0.0287, | |
| "num_input_tokens_seen": 2249488, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.1946902654867255, | |
| "grad_norm": 1.0098868608474731, | |
| "learning_rate": 3.0247992737229147e-06, | |
| "loss": 0.0027, | |
| "num_input_tokens_seen": 2255968, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 2.2011263073209975, | |
| "grad_norm": 0.7940512895584106, | |
| "learning_rate": 3.014756988895275e-06, | |
| "loss": 0.0026, | |
| "num_input_tokens_seen": 2262544, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 2.2075623491552694, | |
| "grad_norm": 2.9759926795959473, | |
| "learning_rate": 3.0047060228925256e-06, | |
| "loss": 0.039, | |
| "num_input_tokens_seen": 2269312, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 2.2139983909895413, | |
| "grad_norm": 4.84032678604126, | |
| "learning_rate": 2.994646545220275e-06, | |
| "loss": 0.0154, | |
| "num_input_tokens_seen": 2275968, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 2.2204344328238133, | |
| "grad_norm": 2.9671568870544434, | |
| "learning_rate": 2.9845787255276753e-06, | |
| "loss": 0.0231, | |
| "num_input_tokens_seen": 2282976, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.226870474658085, | |
| "grad_norm": 5.410647392272949, | |
| "learning_rate": 2.9745027336045652e-06, | |
| "loss": 0.04, | |
| "num_input_tokens_seen": 2289696, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 2.233306516492357, | |
| "grad_norm": 5.828602313995361, | |
| "learning_rate": 2.964418739378603e-06, | |
| "loss": 0.0282, | |
| "num_input_tokens_seen": 2296272, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 2.239742558326629, | |
| "grad_norm": 1.9481452703475952, | |
| "learning_rate": 2.954326912912404e-06, | |
| "loss": 0.0143, | |
| "num_input_tokens_seen": 2303120, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 2.246178600160901, | |
| "grad_norm": 3.2762415409088135, | |
| "learning_rate": 2.9442274244006725e-06, | |
| "loss": 0.0194, | |
| "num_input_tokens_seen": 2309728, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 2.252614641995173, | |
| "grad_norm": 2.3237709999084473, | |
| "learning_rate": 2.9341204441673267e-06, | |
| "loss": 0.0051, | |
| "num_input_tokens_seen": 2316144, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.259050683829445, | |
| "grad_norm": 1.7801238298416138, | |
| "learning_rate": 2.924006142662632e-06, | |
| "loss": 0.0162, | |
| "num_input_tokens_seen": 2322768, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 2.265486725663717, | |
| "grad_norm": 4.876129150390625, | |
| "learning_rate": 2.913884690460325e-06, | |
| "loss": 0.0313, | |
| "num_input_tokens_seen": 2329312, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 2.2719227674979887, | |
| "grad_norm": 0.9637519717216492, | |
| "learning_rate": 2.903756258254734e-06, | |
| "loss": 0.0041, | |
| "num_input_tokens_seen": 2335824, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 2.2783588093322606, | |
| "grad_norm": 2.7481493949890137, | |
| "learning_rate": 2.8936210168579043e-06, | |
| "loss": 0.0321, | |
| "num_input_tokens_seen": 2342272, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 2.2847948511665326, | |
| "grad_norm": 1.682763934135437, | |
| "learning_rate": 2.883479137196714e-06, | |
| "loss": 0.0064, | |
| "num_input_tokens_seen": 2349056, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.2912308930008045, | |
| "grad_norm": 5.632142066955566, | |
| "learning_rate": 2.8733307903099926e-06, | |
| "loss": 0.0237, | |
| "num_input_tokens_seen": 2355552, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 2.2976669348350764, | |
| "grad_norm": 2.460470199584961, | |
| "learning_rate": 2.8631761473456377e-06, | |
| "loss": 0.0152, | |
| "num_input_tokens_seen": 2361808, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 2.3041029766693484, | |
| "grad_norm": 0.9998040199279785, | |
| "learning_rate": 2.853015379557729e-06, | |
| "loss": 0.0038, | |
| "num_input_tokens_seen": 2368288, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 2.3105390185036203, | |
| "grad_norm": 3.164407968521118, | |
| "learning_rate": 2.842848658303637e-06, | |
| "loss": 0.0168, | |
| "num_input_tokens_seen": 2374960, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 2.3169750603378922, | |
| "grad_norm": 2.3879611492156982, | |
| "learning_rate": 2.832676155041135e-06, | |
| "loss": 0.0049, | |
| "num_input_tokens_seen": 2381776, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.323411102172164, | |
| "grad_norm": 1.3164470195770264, | |
| "learning_rate": 2.822498041325509e-06, | |
| "loss": 0.0114, | |
| "num_input_tokens_seen": 2388112, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 2.329847144006436, | |
| "grad_norm": 2.3726656436920166, | |
| "learning_rate": 2.8123144888066623e-06, | |
| "loss": 0.022, | |
| "num_input_tokens_seen": 2394736, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 2.336283185840708, | |
| "grad_norm": 1.7789826393127441, | |
| "learning_rate": 2.802125669226222e-06, | |
| "loss": 0.0154, | |
| "num_input_tokens_seen": 2401248, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 2.34271922767498, | |
| "grad_norm": 3.68959641456604, | |
| "learning_rate": 2.7919317544146405e-06, | |
| "loss": 0.0204, | |
| "num_input_tokens_seen": 2407872, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 2.349155269509252, | |
| "grad_norm": 2.4927353858947754, | |
| "learning_rate": 2.7817329162883033e-06, | |
| "loss": 0.0334, | |
| "num_input_tokens_seen": 2414432, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.355591311343524, | |
| "grad_norm": 4.594964504241943, | |
| "learning_rate": 2.7715293268466204e-06, | |
| "loss": 0.0132, | |
| "num_input_tokens_seen": 2420848, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 2.3620273531777958, | |
| "grad_norm": 4.325422286987305, | |
| "learning_rate": 2.761321158169134e-06, | |
| "loss": 0.0291, | |
| "num_input_tokens_seen": 2427728, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 2.3684633950120677, | |
| "grad_norm": 2.46122407913208, | |
| "learning_rate": 2.7511085824126133e-06, | |
| "loss": 0.0089, | |
| "num_input_tokens_seen": 2434880, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 2.3748994368463396, | |
| "grad_norm": 2.729311227798462, | |
| "learning_rate": 2.74089177180815e-06, | |
| "loss": 0.0306, | |
| "num_input_tokens_seen": 2441168, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 2.3813354786806116, | |
| "grad_norm": 5.095163345336914, | |
| "learning_rate": 2.730670898658255e-06, | |
| "loss": 0.0297, | |
| "num_input_tokens_seen": 2447920, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.3877715205148835, | |
| "grad_norm": 1.902287483215332, | |
| "learning_rate": 2.7204461353339546e-06, | |
| "loss": 0.0247, | |
| "num_input_tokens_seen": 2454704, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 2.3942075623491554, | |
| "grad_norm": 3.267244577407837, | |
| "learning_rate": 2.7102176542718783e-06, | |
| "loss": 0.0234, | |
| "num_input_tokens_seen": 2461216, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.4006436041834274, | |
| "grad_norm": 4.101126670837402, | |
| "learning_rate": 2.699985627971354e-06, | |
| "loss": 0.0192, | |
| "num_input_tokens_seen": 2468032, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 2.4070796460176993, | |
| "grad_norm": 4.104948997497559, | |
| "learning_rate": 2.689750228991503e-06, | |
| "loss": 0.0324, | |
| "num_input_tokens_seen": 2474544, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.4135156878519712, | |
| "grad_norm": 2.1446776390075684, | |
| "learning_rate": 2.679511629948319e-06, | |
| "loss": 0.0332, | |
| "num_input_tokens_seen": 2481312, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.419951729686243, | |
| "grad_norm": 0.7457873225212097, | |
| "learning_rate": 2.669270003511769e-06, | |
| "loss": 0.0043, | |
| "num_input_tokens_seen": 2487888, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.426387771520515, | |
| "grad_norm": 2.1420276165008545, | |
| "learning_rate": 2.6590255224028725e-06, | |
| "loss": 0.0197, | |
| "num_input_tokens_seen": 2494784, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 2.432823813354787, | |
| "grad_norm": 3.0415239334106445, | |
| "learning_rate": 2.648778359390794e-06, | |
| "loss": 0.0366, | |
| "num_input_tokens_seen": 2501712, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.439259855189059, | |
| "grad_norm": 3.6502788066864014, | |
| "learning_rate": 2.638528687289925e-06, | |
| "loss": 0.0173, | |
| "num_input_tokens_seen": 2508592, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.445695897023331, | |
| "grad_norm": 2.2913506031036377, | |
| "learning_rate": 2.6282766789569742e-06, | |
| "loss": 0.0102, | |
| "num_input_tokens_seen": 2515216, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.4521319388576024, | |
| "grad_norm": 3.3507297039031982, | |
| "learning_rate": 2.618022507288049e-06, | |
| "loss": 0.0361, | |
| "num_input_tokens_seen": 2522064, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.4585679806918743, | |
| "grad_norm": 2.98098087310791, | |
| "learning_rate": 2.6077663452157398e-06, | |
| "loss": 0.0292, | |
| "num_input_tokens_seen": 2528608, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.4650040225261463, | |
| "grad_norm": 1.4962135553359985, | |
| "learning_rate": 2.5975083657062043e-06, | |
| "loss": 0.0095, | |
| "num_input_tokens_seen": 2535328, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.471440064360418, | |
| "grad_norm": 2.0819742679595947, | |
| "learning_rate": 2.587248741756253e-06, | |
| "loss": 0.015, | |
| "num_input_tokens_seen": 2542224, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.47787610619469, | |
| "grad_norm": 1.8906433582305908, | |
| "learning_rate": 2.576987646390426e-06, | |
| "loss": 0.0276, | |
| "num_input_tokens_seen": 2548976, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.484312148028962, | |
| "grad_norm": 2.451510190963745, | |
| "learning_rate": 2.566725252658081e-06, | |
| "loss": 0.0284, | |
| "num_input_tokens_seen": 2555568, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.490748189863234, | |
| "grad_norm": 3.7337939739227295, | |
| "learning_rate": 2.5564617336304703e-06, | |
| "loss": 0.0366, | |
| "num_input_tokens_seen": 2562128, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.497184231697506, | |
| "grad_norm": 1.6401593685150146, | |
| "learning_rate": 2.546197262397825e-06, | |
| "loss": 0.0322, | |
| "num_input_tokens_seen": 2568640, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.503620273531778, | |
| "grad_norm": 0.9136457443237305, | |
| "learning_rate": 2.535932012066434e-06, | |
| "loss": 0.0057, | |
| "num_input_tokens_seen": 2575024, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.51005631536605, | |
| "grad_norm": 1.119612455368042, | |
| "learning_rate": 2.525666155755725e-06, | |
| "loss": 0.0054, | |
| "num_input_tokens_seen": 2581520, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.5164923572003217, | |
| "grad_norm": 2.4770889282226562, | |
| "learning_rate": 2.515399866595347e-06, | |
| "loss": 0.0199, | |
| "num_input_tokens_seen": 2588528, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.5229283990345936, | |
| "grad_norm": 0.35335639119148254, | |
| "learning_rate": 2.5051333177222476e-06, | |
| "loss": 0.0045, | |
| "num_input_tokens_seen": 2594992, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.5293644408688656, | |
| "grad_norm": 2.8933093547821045, | |
| "learning_rate": 2.4948666822777536e-06, | |
| "loss": 0.0283, | |
| "num_input_tokens_seen": 2601568, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.5358004827031375, | |
| "grad_norm": 1.7032990455627441, | |
| "learning_rate": 2.4846001334046537e-06, | |
| "loss": 0.0248, | |
| "num_input_tokens_seen": 2608160, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.5422365245374094, | |
| "grad_norm": 1.9688091278076172, | |
| "learning_rate": 2.474333844244276e-06, | |
| "loss": 0.0132, | |
| "num_input_tokens_seen": 2614656, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.5486725663716814, | |
| "grad_norm": 3.135990619659424, | |
| "learning_rate": 2.464067987933567e-06, | |
| "loss": 0.04, | |
| "num_input_tokens_seen": 2621600, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.5551086082059533, | |
| "grad_norm": 0.7140212059020996, | |
| "learning_rate": 2.453802737602176e-06, | |
| "loss": 0.0029, | |
| "num_input_tokens_seen": 2627984, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.5615446500402252, | |
| "grad_norm": 3.9643640518188477, | |
| "learning_rate": 2.4435382663695305e-06, | |
| "loss": 0.0254, | |
| "num_input_tokens_seen": 2634720, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.567980691874497, | |
| "grad_norm": 2.284302234649658, | |
| "learning_rate": 2.4332747473419193e-06, | |
| "loss": 0.0108, | |
| "num_input_tokens_seen": 2641456, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.574416733708769, | |
| "grad_norm": 2.6400082111358643, | |
| "learning_rate": 2.4230123536095746e-06, | |
| "loss": 0.0269, | |
| "num_input_tokens_seen": 2647760, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.580852775543041, | |
| "grad_norm": 3.1969995498657227, | |
| "learning_rate": 2.4127512582437486e-06, | |
| "loss": 0.0111, | |
| "num_input_tokens_seen": 2654608, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.587288817377313, | |
| "grad_norm": 3.651118516921997, | |
| "learning_rate": 2.4024916342937966e-06, | |
| "loss": 0.0222, | |
| "num_input_tokens_seen": 2661072, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.593724859211585, | |
| "grad_norm": 2.1281003952026367, | |
| "learning_rate": 2.392233654784262e-06, | |
| "loss": 0.0101, | |
| "num_input_tokens_seen": 2667712, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.600160901045857, | |
| "grad_norm": 2.6782784461975098, | |
| "learning_rate": 2.3819774927119523e-06, | |
| "loss": 0.0138, | |
| "num_input_tokens_seen": 2674496, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.6065969428801288, | |
| "grad_norm": 2.2902138233184814, | |
| "learning_rate": 2.3717233210430258e-06, | |
| "loss": 0.0281, | |
| "num_input_tokens_seen": 2680816, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.6130329847144007, | |
| "grad_norm": 1.9150536060333252, | |
| "learning_rate": 2.3614713127100752e-06, | |
| "loss": 0.0042, | |
| "num_input_tokens_seen": 2687632, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.6194690265486726, | |
| "grad_norm": 0.3568836748600006, | |
| "learning_rate": 2.3512216406092066e-06, | |
| "loss": 0.0015, | |
| "num_input_tokens_seen": 2694464, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.6259050683829446, | |
| "grad_norm": 2.3506011962890625, | |
| "learning_rate": 2.340974477597128e-06, | |
| "loss": 0.0279, | |
| "num_input_tokens_seen": 2701344, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.6323411102172165, | |
| "grad_norm": 2.780200481414795, | |
| "learning_rate": 2.3307299964882314e-06, | |
| "loss": 0.0399, | |
| "num_input_tokens_seen": 2707536, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.6387771520514884, | |
| "grad_norm": 1.1793303489685059, | |
| "learning_rate": 2.3204883700516813e-06, | |
| "loss": 0.0074, | |
| "num_input_tokens_seen": 2714544, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.6452131938857604, | |
| "grad_norm": 1.7807022333145142, | |
| "learning_rate": 2.310249771008498e-06, | |
| "loss": 0.0078, | |
| "num_input_tokens_seen": 2721056, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.6516492357200323, | |
| "grad_norm": 12.764676094055176, | |
| "learning_rate": 2.3000143720286463e-06, | |
| "loss": 0.0406, | |
| "num_input_tokens_seen": 2727664, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.6580852775543042, | |
| "grad_norm": 0.44338610768318176, | |
| "learning_rate": 2.2897823457281225e-06, | |
| "loss": 0.0023, | |
| "num_input_tokens_seen": 2733600, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.664521319388576, | |
| "grad_norm": 3.5756232738494873, | |
| "learning_rate": 2.2795538646660462e-06, | |
| "loss": 0.006, | |
| "num_input_tokens_seen": 2740400, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.670957361222848, | |
| "grad_norm": 1.4301191568374634, | |
| "learning_rate": 2.269329101341745e-06, | |
| "loss": 0.0236, | |
| "num_input_tokens_seen": 2747248, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.67739340305712, | |
| "grad_norm": 2.0859804153442383, | |
| "learning_rate": 2.2591082281918507e-06, | |
| "loss": 0.0136, | |
| "num_input_tokens_seen": 2753776, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.6838294448913915, | |
| "grad_norm": 2.9704370498657227, | |
| "learning_rate": 2.2488914175873876e-06, | |
| "loss": 0.015, | |
| "num_input_tokens_seen": 2760720, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.6902654867256635, | |
| "grad_norm": 3.1178269386291504, | |
| "learning_rate": 2.238678841830867e-06, | |
| "loss": 0.0483, | |
| "num_input_tokens_seen": 2767136, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.6967015285599354, | |
| "grad_norm": 0.6049777269363403, | |
| "learning_rate": 2.2284706731533805e-06, | |
| "loss": 0.0014, | |
| "num_input_tokens_seen": 2773680, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.7031375703942073, | |
| "grad_norm": 3.5615270137786865, | |
| "learning_rate": 2.2182670837116975e-06, | |
| "loss": 0.0279, | |
| "num_input_tokens_seen": 2780160, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.7095736122284793, | |
| "grad_norm": 3.4241111278533936, | |
| "learning_rate": 2.20806824558536e-06, | |
| "loss": 0.0705, | |
| "num_input_tokens_seen": 2786912, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.716009654062751, | |
| "grad_norm": 1.0644826889038086, | |
| "learning_rate": 2.197874330773779e-06, | |
| "loss": 0.005, | |
| "num_input_tokens_seen": 2793888, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.722445695897023, | |
| "grad_norm": 5.071107387542725, | |
| "learning_rate": 2.1876855111933385e-06, | |
| "loss": 0.0453, | |
| "num_input_tokens_seen": 2800320, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.728881737731295, | |
| "grad_norm": 1.9479647874832153, | |
| "learning_rate": 2.1775019586744924e-06, | |
| "loss": 0.0095, | |
| "num_input_tokens_seen": 2807088, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.735317779565567, | |
| "grad_norm": 2.730952262878418, | |
| "learning_rate": 2.167323844958867e-06, | |
| "loss": 0.0095, | |
| "num_input_tokens_seen": 2813312, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.741753821399839, | |
| "grad_norm": 2.1456387042999268, | |
| "learning_rate": 2.1571513416963647e-06, | |
| "loss": 0.0138, | |
| "num_input_tokens_seen": 2819936, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.748189863234111, | |
| "grad_norm": 2.14911150932312, | |
| "learning_rate": 2.1469846204422724e-06, | |
| "loss": 0.0272, | |
| "num_input_tokens_seen": 2826224, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.754625905068383, | |
| "grad_norm": 0.5957837700843811, | |
| "learning_rate": 2.136823852654363e-06, | |
| "loss": 0.0026, | |
| "num_input_tokens_seen": 2832960, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.7610619469026547, | |
| "grad_norm": 0.3253982961177826, | |
| "learning_rate": 2.126669209690008e-06, | |
| "loss": 0.0016, | |
| "num_input_tokens_seen": 2839888, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.7674979887369267, | |
| "grad_norm": 3.472017765045166, | |
| "learning_rate": 2.1165208628032863e-06, | |
| "loss": 0.0633, | |
| "num_input_tokens_seen": 2846688, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.7739340305711986, | |
| "grad_norm": 2.029026985168457, | |
| "learning_rate": 2.1063789831420957e-06, | |
| "loss": 0.0191, | |
| "num_input_tokens_seen": 2853184, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.7803700724054705, | |
| "grad_norm": 2.316349506378174, | |
| "learning_rate": 2.096243741745266e-06, | |
| "loss": 0.0075, | |
| "num_input_tokens_seen": 2859632, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.7868061142397424, | |
| "grad_norm": 3.786245346069336, | |
| "learning_rate": 2.086115309539675e-06, | |
| "loss": 0.0371, | |
| "num_input_tokens_seen": 2865920, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.7932421560740144, | |
| "grad_norm": 1.864402413368225, | |
| "learning_rate": 2.0759938573373683e-06, | |
| "loss": 0.0275, | |
| "num_input_tokens_seen": 2872464, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.7996781979082863, | |
| "grad_norm": 8.142292022705078, | |
| "learning_rate": 2.0658795558326745e-06, | |
| "loss": 0.0441, | |
| "num_input_tokens_seen": 2879168, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.8061142397425582, | |
| "grad_norm": 1.3945283889770508, | |
| "learning_rate": 2.0557725755993283e-06, | |
| "loss": 0.0074, | |
| "num_input_tokens_seen": 2885520, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.81255028157683, | |
| "grad_norm": 1.951145887374878, | |
| "learning_rate": 2.0456730870875964e-06, | |
| "loss": 0.0412, | |
| "num_input_tokens_seen": 2892368, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.818986323411102, | |
| "grad_norm": 1.666693925857544, | |
| "learning_rate": 2.035581260621398e-06, | |
| "loss": 0.007, | |
| "num_input_tokens_seen": 2898640, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.825422365245374, | |
| "grad_norm": 0.8178473114967346, | |
| "learning_rate": 2.0254972663954356e-06, | |
| "loss": 0.0195, | |
| "num_input_tokens_seen": 2905312, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.831858407079646, | |
| "grad_norm": 2.1499900817871094, | |
| "learning_rate": 2.015421274472325e-06, | |
| "loss": 0.0104, | |
| "num_input_tokens_seen": 2911872, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.838294448913918, | |
| "grad_norm": 3.162245273590088, | |
| "learning_rate": 2.005353454779726e-06, | |
| "loss": 0.0196, | |
| "num_input_tokens_seen": 2918496, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.84473049074819, | |
| "grad_norm": 1.1920592784881592, | |
| "learning_rate": 1.995293977107475e-06, | |
| "loss": 0.0131, | |
| "num_input_tokens_seen": 2924944, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.8511665325824618, | |
| "grad_norm": 1.091436743736267, | |
| "learning_rate": 1.9852430111047254e-06, | |
| "loss": 0.0072, | |
| "num_input_tokens_seen": 2931440, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.8576025744167337, | |
| "grad_norm": 2.0469212532043457, | |
| "learning_rate": 1.9752007262770857e-06, | |
| "loss": 0.0058, | |
| "num_input_tokens_seen": 2938304, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.8640386162510056, | |
| "grad_norm": 1.6995850801467896, | |
| "learning_rate": 1.965167291983757e-06, | |
| "loss": 0.0242, | |
| "num_input_tokens_seen": 2945168, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.8704746580852776, | |
| "grad_norm": 5.4955735206604, | |
| "learning_rate": 1.955142877434681e-06, | |
| "loss": 0.0323, | |
| "num_input_tokens_seen": 2951952, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.8769106999195495, | |
| "grad_norm": 1.5203238725662231, | |
| "learning_rate": 1.9451276516876856e-06, | |
| "loss": 0.0186, | |
| "num_input_tokens_seen": 2958432, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.8833467417538214, | |
| "grad_norm": 1.398633599281311, | |
| "learning_rate": 1.9351217836456316e-06, | |
| "loss": 0.0071, | |
| "num_input_tokens_seen": 2965328, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.8897827835880934, | |
| "grad_norm": 1.4775344133377075, | |
| "learning_rate": 1.9251254420535665e-06, | |
| "loss": 0.0177, | |
| "num_input_tokens_seen": 2971872, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.8962188254223653, | |
| "grad_norm": 3.7046666145324707, | |
| "learning_rate": 1.9151387954958792e-06, | |
| "loss": 0.044, | |
| "num_input_tokens_seen": 2978784, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.9026548672566372, | |
| "grad_norm": 1.9969475269317627, | |
| "learning_rate": 1.9051620123934538e-06, | |
| "loss": 0.0119, | |
| "num_input_tokens_seen": 2985760, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 1.3861935138702393, | |
| "learning_rate": 1.895195261000831e-06, | |
| "loss": 0.0121, | |
| "num_input_tokens_seen": 2992352, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.915526950925181, | |
| "grad_norm": 2.0632236003875732, | |
| "learning_rate": 1.885238709403372e-06, | |
| "loss": 0.0319, | |
| "num_input_tokens_seen": 2998800, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.921962992759453, | |
| "grad_norm": 0.31324344873428345, | |
| "learning_rate": 1.8752925255144228e-06, | |
| "loss": 0.0024, | |
| "num_input_tokens_seen": 3005392, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.928399034593725, | |
| "grad_norm": 1.0096696615219116, | |
| "learning_rate": 1.8653568770724805e-06, | |
| "loss": 0.0102, | |
| "num_input_tokens_seen": 3012016, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.934835076427997, | |
| "grad_norm": 4.725823879241943, | |
| "learning_rate": 1.8554319316383657e-06, | |
| "loss": 0.0419, | |
| "num_input_tokens_seen": 3018768, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.941271118262269, | |
| "grad_norm": 1.6467297077178955, | |
| "learning_rate": 1.8455178565923993e-06, | |
| "loss": 0.0109, | |
| "num_input_tokens_seen": 3025328, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.9477071600965408, | |
| "grad_norm": 1.3065979480743408, | |
| "learning_rate": 1.8356148191315753e-06, | |
| "loss": 0.0092, | |
| "num_input_tokens_seen": 3032080, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.9541432019308127, | |
| "grad_norm": 2.6485443115234375, | |
| "learning_rate": 1.8257229862667437e-06, | |
| "loss": 0.0449, | |
| "num_input_tokens_seen": 3038880, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.9605792437650846, | |
| "grad_norm": 0.9736925363540649, | |
| "learning_rate": 1.8158425248197931e-06, | |
| "loss": 0.014, | |
| "num_input_tokens_seen": 3045552, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.9670152855993566, | |
| "grad_norm": 0.423833429813385, | |
| "learning_rate": 1.8059736014208388e-06, | |
| "loss": 0.0035, | |
| "num_input_tokens_seen": 3052288, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.9734513274336285, | |
| "grad_norm": 3.7729272842407227, | |
| "learning_rate": 1.7961163825054101e-06, | |
| "loss": 0.016, | |
| "num_input_tokens_seen": 3058768, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.9798873692679004, | |
| "grad_norm": 2.9312222003936768, | |
| "learning_rate": 1.7862710343116451e-06, | |
| "loss": 0.0151, | |
| "num_input_tokens_seen": 3065584, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.9863234111021724, | |
| "grad_norm": 0.6318484544754028, | |
| "learning_rate": 1.7764377228774877e-06, | |
| "loss": 0.0039, | |
| "num_input_tokens_seen": 3072368, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.9927594529364443, | |
| "grad_norm": 5.504857063293457, | |
| "learning_rate": 1.7666166140378853e-06, | |
| "loss": 0.0361, | |
| "num_input_tokens_seen": 3078864, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.9991954947707162, | |
| "grad_norm": 2.98315167427063, | |
| "learning_rate": 1.7568078734219934e-06, | |
| "loss": 0.0609, | |
| "num_input_tokens_seen": 3085664, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 3.0056315366049877, | |
| "grad_norm": 0.24189546704292297, | |
| "learning_rate": 1.747011666450384e-06, | |
| "loss": 0.0027, | |
| "num_input_tokens_seen": 3091568, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 3.0120675784392597, | |
| "grad_norm": 3.122098922729492, | |
| "learning_rate": 1.737228158332252e-06, | |
| "loss": 0.0097, | |
| "num_input_tokens_seen": 3098544, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 3.0185036202735316, | |
| "grad_norm": 2.117048740386963, | |
| "learning_rate": 1.7274575140626318e-06, | |
| "loss": 0.0091, | |
| "num_input_tokens_seen": 3105120, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 3.0249396621078035, | |
| "grad_norm": 0.3818783760070801, | |
| "learning_rate": 1.7176998984196148e-06, | |
| "loss": 0.0026, | |
| "num_input_tokens_seen": 3111552, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.0313757039420755, | |
| "grad_norm": 3.4925177097320557, | |
| "learning_rate": 1.7079554759615685e-06, | |
| "loss": 0.0311, | |
| "num_input_tokens_seen": 3118192, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 3.0378117457763474, | |
| "grad_norm": 0.1732572317123413, | |
| "learning_rate": 1.6982244110243626e-06, | |
| "loss": 0.0014, | |
| "num_input_tokens_seen": 3124640, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 3.0442477876106193, | |
| "grad_norm": 1.305844783782959, | |
| "learning_rate": 1.6885068677185989e-06, | |
| "loss": 0.0185, | |
| "num_input_tokens_seen": 3130992, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 3.0506838294448912, | |
| "grad_norm": 0.9071294665336609, | |
| "learning_rate": 1.678803009926841e-06, | |
| "loss": 0.0075, | |
| "num_input_tokens_seen": 3137696, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 3.057119871279163, | |
| "grad_norm": 0.9389513731002808, | |
| "learning_rate": 1.6691130013008514e-06, | |
| "loss": 0.0069, | |
| "num_input_tokens_seen": 3144560, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 3.063555913113435, | |
| "grad_norm": 0.15343110263347626, | |
| "learning_rate": 1.6594370052588328e-06, | |
| "loss": 0.0009, | |
| "num_input_tokens_seen": 3151072, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 3.069991954947707, | |
| "grad_norm": 0.5078912973403931, | |
| "learning_rate": 1.6497751849826692e-06, | |
| "loss": 0.0015, | |
| "num_input_tokens_seen": 3158016, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 3.076427996781979, | |
| "grad_norm": 0.14821191132068634, | |
| "learning_rate": 1.6401277034151798e-06, | |
| "loss": 0.0007, | |
| "num_input_tokens_seen": 3164560, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 3.082864038616251, | |
| "grad_norm": 0.3397853672504425, | |
| "learning_rate": 1.630494723257363e-06, | |
| "loss": 0.0012, | |
| "num_input_tokens_seen": 3171088, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 3.089300080450523, | |
| "grad_norm": 0.25013279914855957, | |
| "learning_rate": 1.620876406965658e-06, | |
| "loss": 0.0018, | |
| "num_input_tokens_seen": 3177952, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.0957361222847948, | |
| "grad_norm": 0.04799158126115799, | |
| "learning_rate": 1.611272916749205e-06, | |
| "loss": 0.0003, | |
| "num_input_tokens_seen": 3184592, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 3.1021721641190667, | |
| "grad_norm": 2.0195066928863525, | |
| "learning_rate": 1.6016844145671062e-06, | |
| "loss": 0.0044, | |
| "num_input_tokens_seen": 3190896, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 3.1086082059533386, | |
| "grad_norm": 0.6244819164276123, | |
| "learning_rate": 1.5921110621256972e-06, | |
| "loss": 0.0019, | |
| "num_input_tokens_seen": 3197376, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 3.1150442477876106, | |
| "grad_norm": 2.540050506591797, | |
| "learning_rate": 1.58255302087582e-06, | |
| "loss": 0.0059, | |
| "num_input_tokens_seen": 3203776, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 3.1214802896218825, | |
| "grad_norm": 0.7487736344337463, | |
| "learning_rate": 1.5730104520100984e-06, | |
| "loss": 0.0036, | |
| "num_input_tokens_seen": 3210464, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 3.1279163314561544, | |
| "grad_norm": 0.052535440772771835, | |
| "learning_rate": 1.56348351646022e-06, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 3217056, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 3.1343523732904264, | |
| "grad_norm": 2.5393643379211426, | |
| "learning_rate": 1.5539723748942246e-06, | |
| "loss": 0.0019, | |
| "num_input_tokens_seen": 3223840, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 3.1407884151246983, | |
| "grad_norm": 0.28790536522865295, | |
| "learning_rate": 1.544477187713791e-06, | |
| "loss": 0.0009, | |
| "num_input_tokens_seen": 3230592, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 3.1472244569589702, | |
| "grad_norm": 2.5697410106658936, | |
| "learning_rate": 1.534998115051533e-06, | |
| "loss": 0.0318, | |
| "num_input_tokens_seen": 3237216, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 3.153660498793242, | |
| "grad_norm": 1.5203006267547607, | |
| "learning_rate": 1.5255353167683017e-06, | |
| "loss": 0.0216, | |
| "num_input_tokens_seen": 3243920, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.160096540627514, | |
| "grad_norm": 0.1484091877937317, | |
| "learning_rate": 1.5160889524504857e-06, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 3250656, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 3.166532582461786, | |
| "grad_norm": 3.3526744842529297, | |
| "learning_rate": 1.50665918140732e-06, | |
| "loss": 0.0286, | |
| "num_input_tokens_seen": 3257312, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 3.172968624296058, | |
| "grad_norm": 1.3879235982894897, | |
| "learning_rate": 1.4972461626682033e-06, | |
| "loss": 0.0254, | |
| "num_input_tokens_seen": 3264112, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 3.17940466613033, | |
| "grad_norm": 1.1939952373504639, | |
| "learning_rate": 1.4878500549800115e-06, | |
| "loss": 0.0039, | |
| "num_input_tokens_seen": 3270528, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 3.185840707964602, | |
| "grad_norm": 0.20248474180698395, | |
| "learning_rate": 1.4784710168044215e-06, | |
| "loss": 0.0005, | |
| "num_input_tokens_seen": 3277008, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 3.1922767497988738, | |
| "grad_norm": 1.903956413269043, | |
| "learning_rate": 1.4691092063152417e-06, | |
| "loss": 0.0196, | |
| "num_input_tokens_seen": 3283376, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 3.1987127916331457, | |
| "grad_norm": 0.3746008276939392, | |
| "learning_rate": 1.459764781395741e-06, | |
| "loss": 0.0015, | |
| "num_input_tokens_seen": 3289664, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 3.2051488334674176, | |
| "grad_norm": 4.635190486907959, | |
| "learning_rate": 1.4504378996359867e-06, | |
| "loss": 0.0088, | |
| "num_input_tokens_seen": 3296576, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 3.2115848753016896, | |
| "grad_norm": 1.4451507329940796, | |
| "learning_rate": 1.4411287183301902e-06, | |
| "loss": 0.0023, | |
| "num_input_tokens_seen": 3303120, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 3.2180209171359615, | |
| "grad_norm": 1.252470850944519, | |
| "learning_rate": 1.4318373944740485e-06, | |
| "loss": 0.0071, | |
| "num_input_tokens_seen": 3310384, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.2244569589702334, | |
| "grad_norm": 0.6509237289428711, | |
| "learning_rate": 1.4225640847621006e-06, | |
| "loss": 0.0006, | |
| "num_input_tokens_seen": 3316768, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 3.2308930008045054, | |
| "grad_norm": 0.2248382717370987, | |
| "learning_rate": 1.4133089455850878e-06, | |
| "loss": 0.0011, | |
| "num_input_tokens_seen": 3323488, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 3.2373290426387773, | |
| "grad_norm": 1.0306220054626465, | |
| "learning_rate": 1.4040721330273063e-06, | |
| "loss": 0.0057, | |
| "num_input_tokens_seen": 3330000, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 3.2437650844730492, | |
| "grad_norm": 0.1734343320131302, | |
| "learning_rate": 1.3948538028639851e-06, | |
| "loss": 0.0006, | |
| "num_input_tokens_seen": 3336592, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 3.250201126307321, | |
| "grad_norm": 0.9872696399688721, | |
| "learning_rate": 1.3856541105586545e-06, | |
| "loss": 0.0066, | |
| "num_input_tokens_seen": 3343136, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 3.256637168141593, | |
| "grad_norm": 0.8048367500305176, | |
| "learning_rate": 1.3764732112605223e-06, | |
| "loss": 0.0079, | |
| "num_input_tokens_seen": 3349680, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 3.263073209975865, | |
| "grad_norm": 1.8275296688079834, | |
| "learning_rate": 1.367311259801863e-06, | |
| "loss": 0.0215, | |
| "num_input_tokens_seen": 3356304, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 3.2695092518101365, | |
| "grad_norm": 1.90727698802948, | |
| "learning_rate": 1.3581684106953987e-06, | |
| "loss": 0.0031, | |
| "num_input_tokens_seen": 3363008, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 3.2759452936444085, | |
| "grad_norm": 2.614037275314331, | |
| "learning_rate": 1.3490448181317025e-06, | |
| "loss": 0.0024, | |
| "num_input_tokens_seen": 3369728, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 3.2823813354786804, | |
| "grad_norm": 1.9239071607589722, | |
| "learning_rate": 1.3399406359765921e-06, | |
| "loss": 0.0094, | |
| "num_input_tokens_seen": 3375968, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.2888173773129523, | |
| "grad_norm": 1.1601731777191162, | |
| "learning_rate": 1.3308560177685334e-06, | |
| "loss": 0.0054, | |
| "num_input_tokens_seen": 3383024, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 3.2952534191472242, | |
| "grad_norm": 0.31424281001091003, | |
| "learning_rate": 1.3217911167160575e-06, | |
| "loss": 0.0008, | |
| "num_input_tokens_seen": 3389488, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 3.301689460981496, | |
| "grad_norm": 2.633910655975342, | |
| "learning_rate": 1.3127460856951724e-06, | |
| "loss": 0.0053, | |
| "num_input_tokens_seen": 3395712, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 3.308125502815768, | |
| "grad_norm": 0.9618326425552368, | |
| "learning_rate": 1.303721077246784e-06, | |
| "loss": 0.006, | |
| "num_input_tokens_seen": 3402384, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 3.31456154465004, | |
| "grad_norm": 0.22136647999286652, | |
| "learning_rate": 1.2947162435741278e-06, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 3409136, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 3.320997586484312, | |
| "grad_norm": 1.880077838897705, | |
| "learning_rate": 1.2857317365401997e-06, | |
| "loss": 0.0135, | |
| "num_input_tokens_seen": 3415776, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 3.327433628318584, | |
| "grad_norm": 2.234178304672241, | |
| "learning_rate": 1.2767677076651913e-06, | |
| "loss": 0.0083, | |
| "num_input_tokens_seen": 3422496, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 3.333869670152856, | |
| "grad_norm": 0.21132518351078033, | |
| "learning_rate": 1.2678243081239421e-06, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 3429312, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 3.340305711987128, | |
| "grad_norm": 1.0334022045135498, | |
| "learning_rate": 1.2589016887433846e-06, | |
| "loss": 0.0038, | |
| "num_input_tokens_seen": 3435840, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 3.3467417538213997, | |
| "grad_norm": 2.751037359237671, | |
| "learning_rate": 1.2500000000000007e-06, | |
| "loss": 0.0269, | |
| "num_input_tokens_seen": 3442176, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.3531777956556716, | |
| "grad_norm": 0.7970973253250122, | |
| "learning_rate": 1.2411193920172866e-06, | |
| "loss": 0.0062, | |
| "num_input_tokens_seen": 3448784, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 3.3596138374899436, | |
| "grad_norm": 0.09952107071876526, | |
| "learning_rate": 1.2322600145632204e-06, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 3455184, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 3.3660498793242155, | |
| "grad_norm": 0.6218022108078003, | |
| "learning_rate": 1.2234220170477332e-06, | |
| "loss": 0.0011, | |
| "num_input_tokens_seen": 3461792, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 3.3724859211584874, | |
| "grad_norm": 1.4417766332626343, | |
| "learning_rate": 1.2146055485201943e-06, | |
| "loss": 0.0026, | |
| "num_input_tokens_seen": 3468624, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 3.3789219629927594, | |
| "grad_norm": 2.819247245788574, | |
| "learning_rate": 1.205810757666894e-06, | |
| "loss": 0.0183, | |
| "num_input_tokens_seen": 3474976, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 3.3853580048270313, | |
| "grad_norm": 1.7066518068313599, | |
| "learning_rate": 1.1970377928085372e-06, | |
| "loss": 0.0079, | |
| "num_input_tokens_seen": 3481360, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 3.3917940466613032, | |
| "grad_norm": 2.671914577484131, | |
| "learning_rate": 1.188286801897743e-06, | |
| "loss": 0.0123, | |
| "num_input_tokens_seen": 3487904, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 3.398230088495575, | |
| "grad_norm": 0.37451621890068054, | |
| "learning_rate": 1.1795579325165448e-06, | |
| "loss": 0.0018, | |
| "num_input_tokens_seen": 3494368, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 3.404666130329847, | |
| "grad_norm": 0.3565497398376465, | |
| "learning_rate": 1.1708513318739096e-06, | |
| "loss": 0.0014, | |
| "num_input_tokens_seen": 3500704, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 3.411102172164119, | |
| "grad_norm": 0.22408631443977356, | |
| "learning_rate": 1.1621671468032495e-06, | |
| "loss": 0.0009, | |
| "num_input_tokens_seen": 3507216, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.417538213998391, | |
| "grad_norm": 1.3339484930038452, | |
| "learning_rate": 1.153505523759944e-06, | |
| "loss": 0.0078, | |
| "num_input_tokens_seen": 3513664, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 3.423974255832663, | |
| "grad_norm": 0.6494855880737305, | |
| "learning_rate": 1.1448666088188766e-06, | |
| "loss": 0.0027, | |
| "num_input_tokens_seen": 3520096, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 3.430410297666935, | |
| "grad_norm": 0.10890411585569382, | |
| "learning_rate": 1.1362505476719662e-06, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 3526560, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 3.4368463395012068, | |
| "grad_norm": 13.174049377441406, | |
| "learning_rate": 1.1276574856257097e-06, | |
| "loss": 0.0064, | |
| "num_input_tokens_seen": 3533536, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 3.4432823813354787, | |
| "grad_norm": 2.0001068115234375, | |
| "learning_rate": 1.1190875675987355e-06, | |
| "loss": 0.007, | |
| "num_input_tokens_seen": 3540288, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 3.4497184231697506, | |
| "grad_norm": 0.15650025010108948, | |
| "learning_rate": 1.1105409381193572e-06, | |
| "loss": 0.0005, | |
| "num_input_tokens_seen": 3546720, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 3.4561544650040226, | |
| "grad_norm": 0.13460475206375122, | |
| "learning_rate": 1.1020177413231334e-06, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 3553280, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 3.4625905068382945, | |
| "grad_norm": 2.165956735610962, | |
| "learning_rate": 1.0935181209504422e-06, | |
| "loss": 0.0294, | |
| "num_input_tokens_seen": 3559776, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 3.4690265486725664, | |
| "grad_norm": 0.6856318712234497, | |
| "learning_rate": 1.0850422203440555e-06, | |
| "loss": 0.0036, | |
| "num_input_tokens_seen": 3566848, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 3.4754625905068384, | |
| "grad_norm": 1.306766152381897, | |
| "learning_rate": 1.0765901824467167e-06, | |
| "loss": 0.0051, | |
| "num_input_tokens_seen": 3573280, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.4818986323411103, | |
| "grad_norm": 0.3889179825782776, | |
| "learning_rate": 1.068162149798737e-06, | |
| "loss": 0.0012, | |
| "num_input_tokens_seen": 3579712, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 3.4883346741753822, | |
| "grad_norm": 1.5245965719223022, | |
| "learning_rate": 1.0597582645355891e-06, | |
| "loss": 0.0231, | |
| "num_input_tokens_seen": 3586480, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 3.494770716009654, | |
| "grad_norm": 0.6708037257194519, | |
| "learning_rate": 1.0513786683855062e-06, | |
| "loss": 0.0041, | |
| "num_input_tokens_seen": 3593136, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 3.501206757843926, | |
| "grad_norm": 2.0138630867004395, | |
| "learning_rate": 1.0430235026670979e-06, | |
| "loss": 0.0124, | |
| "num_input_tokens_seen": 3599968, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 3.507642799678198, | |
| "grad_norm": 7.274059295654297, | |
| "learning_rate": 1.034692908286964e-06, | |
| "loss": 0.0171, | |
| "num_input_tokens_seen": 3606592, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 3.51407884151247, | |
| "grad_norm": 5.609940052032471, | |
| "learning_rate": 1.0263870257373162e-06, | |
| "loss": 0.008, | |
| "num_input_tokens_seen": 3613072, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 3.520514883346742, | |
| "grad_norm": 1.4191588163375854, | |
| "learning_rate": 1.0181059950936131e-06, | |
| "loss": 0.0035, | |
| "num_input_tokens_seen": 3619696, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 3.526950925181014, | |
| "grad_norm": 0.1580982804298401, | |
| "learning_rate": 1.0098499560121943e-06, | |
| "loss": 0.0006, | |
| "num_input_tokens_seen": 3626240, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 3.5333869670152858, | |
| "grad_norm": 0.637765109539032, | |
| "learning_rate": 1.0016190477279274e-06, | |
| "loss": 0.002, | |
| "num_input_tokens_seen": 3632704, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 3.5398230088495577, | |
| "grad_norm": 0.07971790432929993, | |
| "learning_rate": 9.934134090518593e-07, | |
| "loss": 0.0003, | |
| "num_input_tokens_seen": 3639360, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.5462590506838296, | |
| "grad_norm": 0.15312433242797852, | |
| "learning_rate": 9.852331783688722e-07, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 3646112, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 3.5526950925181016, | |
| "grad_norm": 0.5353730916976929, | |
| "learning_rate": 9.770784936353555e-07, | |
| "loss": 0.0016, | |
| "num_input_tokens_seen": 3652704, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 3.5591311343523735, | |
| "grad_norm": 0.3197666108608246, | |
| "learning_rate": 9.689494923768756e-07, | |
| "loss": 0.0012, | |
| "num_input_tokens_seen": 3659696, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 3.5655671761866454, | |
| "grad_norm": 1.4529962539672852, | |
| "learning_rate": 9.608463116858544e-07, | |
| "loss": 0.0057, | |
| "num_input_tokens_seen": 3666288, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 3.5720032180209174, | |
| "grad_norm": 2.7501587867736816, | |
| "learning_rate": 9.527690882192636e-07, | |
| "loss": 0.0168, | |
| "num_input_tokens_seen": 3673104, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 3.5784392598551893, | |
| "grad_norm": 0.21036742627620697, | |
| "learning_rate": 9.447179581963156e-07, | |
| "loss": 0.0012, | |
| "num_input_tokens_seen": 3679872, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 3.5848753016894612, | |
| "grad_norm": 0.03335335850715637, | |
| "learning_rate": 9.366930573961649e-07, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 3686288, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 3.591311343523733, | |
| "grad_norm": 1.3189131021499634, | |
| "learning_rate": 9.286945211556231e-07, | |
| "loss": 0.0025, | |
| "num_input_tokens_seen": 3692976, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 3.597747385358005, | |
| "grad_norm": 1.1787400245666504, | |
| "learning_rate": 9.207224843668733e-07, | |
| "loss": 0.0194, | |
| "num_input_tokens_seen": 3699312, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 3.604183427192277, | |
| "grad_norm": 0.9992094039916992, | |
| "learning_rate": 9.127770814751933e-07, | |
| "loss": 0.0055, | |
| "num_input_tokens_seen": 3705888, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.6106194690265485, | |
| "grad_norm": 2.264843702316284, | |
| "learning_rate": 9.048584464766938e-07, | |
| "loss": 0.0215, | |
| "num_input_tokens_seen": 3712688, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 3.6170555108608204, | |
| "grad_norm": 0.06527237594127655, | |
| "learning_rate": 8.969667129160547e-07, | |
| "loss": 0.0003, | |
| "num_input_tokens_seen": 3719168, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 3.6234915526950924, | |
| "grad_norm": 3.79392409324646, | |
| "learning_rate": 8.891020138842718e-07, | |
| "loss": 0.0242, | |
| "num_input_tokens_seen": 3726048, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 3.6299275945293643, | |
| "grad_norm": 0.9232211112976074, | |
| "learning_rate": 8.81264482016416e-07, | |
| "loss": 0.0206, | |
| "num_input_tokens_seen": 3732672, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 3.6363636363636362, | |
| "grad_norm": 0.5276843309402466, | |
| "learning_rate": 8.734542494893955e-07, | |
| "loss": 0.0024, | |
| "num_input_tokens_seen": 3739456, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 3.642799678197908, | |
| "grad_norm": 1.1676807403564453, | |
| "learning_rate": 8.65671448019722e-07, | |
| "loss": 0.0087, | |
| "num_input_tokens_seen": 3746160, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 3.64923572003218, | |
| "grad_norm": 1.3703765869140625, | |
| "learning_rate": 8.579162088612974e-07, | |
| "loss": 0.0089, | |
| "num_input_tokens_seen": 3752560, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 3.655671761866452, | |
| "grad_norm": 0.06538532674312592, | |
| "learning_rate": 8.501886628031941e-07, | |
| "loss": 0.0003, | |
| "num_input_tokens_seen": 3759600, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 3.662107803700724, | |
| "grad_norm": 0.0386020764708519, | |
| "learning_rate": 8.424889401674505e-07, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 3766096, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 3.668543845534996, | |
| "grad_norm": 0.20554865896701813, | |
| "learning_rate": 8.348171708068748e-07, | |
| "loss": 0.0009, | |
| "num_input_tokens_seen": 3772944, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.674979887369268, | |
| "grad_norm": 0.9973205327987671, | |
| "learning_rate": 8.271734841028553e-07, | |
| "loss": 0.0154, | |
| "num_input_tokens_seen": 3779664, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 3.6814159292035398, | |
| "grad_norm": 0.30160781741142273, | |
| "learning_rate": 8.195580089631733e-07, | |
| "loss": 0.0012, | |
| "num_input_tokens_seen": 3786080, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 3.6878519710378117, | |
| "grad_norm": 0.49049124121665955, | |
| "learning_rate": 8.119708738198395e-07, | |
| "loss": 0.0008, | |
| "num_input_tokens_seen": 3792768, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 3.6942880128720836, | |
| "grad_norm": 1.6590077877044678, | |
| "learning_rate": 8.04412206626915e-07, | |
| "loss": 0.0081, | |
| "num_input_tokens_seen": 3799472, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 3.7007240547063556, | |
| "grad_norm": 1.814943552017212, | |
| "learning_rate": 7.968821348583644e-07, | |
| "loss": 0.008, | |
| "num_input_tokens_seen": 3805984, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.7071600965406275, | |
| "grad_norm": 1.6639471054077148, | |
| "learning_rate": 7.89380785505901e-07, | |
| "loss": 0.0073, | |
| "num_input_tokens_seen": 3813088, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 3.7135961383748994, | |
| "grad_norm": 0.946050763130188, | |
| "learning_rate": 7.819082850768433e-07, | |
| "loss": 0.0062, | |
| "num_input_tokens_seen": 3820208, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 3.7200321802091714, | |
| "grad_norm": 0.2189425230026245, | |
| "learning_rate": 7.744647595919869e-07, | |
| "loss": 0.0015, | |
| "num_input_tokens_seen": 3826800, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 3.7264682220434433, | |
| "grad_norm": 2.796231985092163, | |
| "learning_rate": 7.670503345834757e-07, | |
| "loss": 0.0268, | |
| "num_input_tokens_seen": 3833344, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 3.7329042638777152, | |
| "grad_norm": 0.13711552321910858, | |
| "learning_rate": 7.596651350926837e-07, | |
| "loss": 0.0007, | |
| "num_input_tokens_seen": 3839920, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.739340305711987, | |
| "grad_norm": 0.3616367280483246, | |
| "learning_rate": 7.523092856681099e-07, | |
| "loss": 0.0016, | |
| "num_input_tokens_seen": 3846432, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 3.745776347546259, | |
| "grad_norm": 2.3357245922088623, | |
| "learning_rate": 7.44982910363276e-07, | |
| "loss": 0.0631, | |
| "num_input_tokens_seen": 3853216, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 3.752212389380531, | |
| "grad_norm": 1.7805283069610596, | |
| "learning_rate": 7.376861327346325e-07, | |
| "loss": 0.0128, | |
| "num_input_tokens_seen": 3859664, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 3.758648431214803, | |
| "grad_norm": 0.5933414101600647, | |
| "learning_rate": 7.304190758394775e-07, | |
| "loss": 0.0034, | |
| "num_input_tokens_seen": 3866208, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 3.765084473049075, | |
| "grad_norm": 3.1310431957244873, | |
| "learning_rate": 7.231818622338824e-07, | |
| "loss": 0.0633, | |
| "num_input_tokens_seen": 3872736, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 3.771520514883347, | |
| "grad_norm": 0.2022082656621933, | |
| "learning_rate": 7.159746139706194e-07, | |
| "loss": 0.0007, | |
| "num_input_tokens_seen": 3879264, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 3.7779565567176188, | |
| "grad_norm": 6.451120376586914, | |
| "learning_rate": 7.087974525971103e-07, | |
| "loss": 0.0211, | |
| "num_input_tokens_seen": 3885744, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 3.7843925985518907, | |
| "grad_norm": 0.8931072354316711, | |
| "learning_rate": 7.016504991533727e-07, | |
| "loss": 0.009, | |
| "num_input_tokens_seen": 3892304, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 3.7908286403861626, | |
| "grad_norm": 1.4347479343414307, | |
| "learning_rate": 6.94533874169977e-07, | |
| "loss": 0.0152, | |
| "num_input_tokens_seen": 3898768, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 3.7972646822204346, | |
| "grad_norm": 0.5323463678359985, | |
| "learning_rate": 6.874476976660185e-07, | |
| "loss": 0.0022, | |
| "num_input_tokens_seen": 3904976, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.8037007240547065, | |
| "grad_norm": 1.120011806488037, | |
| "learning_rate": 6.803920891470905e-07, | |
| "loss": 0.014, | |
| "num_input_tokens_seen": 3911360, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 3.8101367658889784, | |
| "grad_norm": 0.6292040348052979, | |
| "learning_rate": 6.733671676032674e-07, | |
| "loss": 0.0085, | |
| "num_input_tokens_seen": 3918224, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 3.8165728077232504, | |
| "grad_norm": 3.3647360801696777, | |
| "learning_rate": 6.663730515071019e-07, | |
| "loss": 0.0161, | |
| "num_input_tokens_seen": 3924960, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 3.823008849557522, | |
| "grad_norm": 1.8465656042099, | |
| "learning_rate": 6.594098588116243e-07, | |
| "loss": 0.0234, | |
| "num_input_tokens_seen": 3931712, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 3.829444891391794, | |
| "grad_norm": 1.0739251375198364, | |
| "learning_rate": 6.524777069483526e-07, | |
| "loss": 0.0186, | |
| "num_input_tokens_seen": 3938304, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 3.8358809332260657, | |
| "grad_norm": 3.146777629852295, | |
| "learning_rate": 6.455767128253148e-07, | |
| "loss": 0.0199, | |
| "num_input_tokens_seen": 3945200, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 3.8423169750603376, | |
| "grad_norm": 1.1694271564483643, | |
| "learning_rate": 6.38706992825075e-07, | |
| "loss": 0.0052, | |
| "num_input_tokens_seen": 3951808, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 3.8487530168946096, | |
| "grad_norm": 1.1547743082046509, | |
| "learning_rate": 6.318686628027723e-07, | |
| "loss": 0.0165, | |
| "num_input_tokens_seen": 3958480, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 3.8551890587288815, | |
| "grad_norm": 1.1595410108566284, | |
| "learning_rate": 6.250618380841661e-07, | |
| "loss": 0.01, | |
| "num_input_tokens_seen": 3965072, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 3.8616251005631534, | |
| "grad_norm": 0.8452915549278259, | |
| "learning_rate": 6.182866334636889e-07, | |
| "loss": 0.0047, | |
| "num_input_tokens_seen": 3971808, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.8680611423974254, | |
| "grad_norm": 2.201892375946045, | |
| "learning_rate": 6.115431632025154e-07, | |
| "loss": 0.0039, | |
| "num_input_tokens_seen": 3978480, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 3.8744971842316973, | |
| "grad_norm": 0.24013373255729675, | |
| "learning_rate": 6.048315410266326e-07, | |
| "loss": 0.0022, | |
| "num_input_tokens_seen": 3985216, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 3.8809332260659692, | |
| "grad_norm": 0.442757248878479, | |
| "learning_rate": 5.981518801249192e-07, | |
| "loss": 0.0042, | |
| "num_input_tokens_seen": 3991792, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 3.887369267900241, | |
| "grad_norm": 2.5312795639038086, | |
| "learning_rate": 5.915042931472426e-07, | |
| "loss": 0.0076, | |
| "num_input_tokens_seen": 3998224, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 3.893805309734513, | |
| "grad_norm": 0.3599741756916046, | |
| "learning_rate": 5.848888922025553e-07, | |
| "loss": 0.0019, | |
| "num_input_tokens_seen": 4004960, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 3.900241351568785, | |
| "grad_norm": 0.33045250177383423, | |
| "learning_rate": 5.783057888570034e-07, | |
| "loss": 0.0014, | |
| "num_input_tokens_seen": 4011984, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 3.906677393403057, | |
| "grad_norm": 0.540598452091217, | |
| "learning_rate": 5.717550941320482e-07, | |
| "loss": 0.0022, | |
| "num_input_tokens_seen": 4018912, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 3.913113435237329, | |
| "grad_norm": 0.4901201128959656, | |
| "learning_rate": 5.65236918502593e-07, | |
| "loss": 0.0024, | |
| "num_input_tokens_seen": 4025504, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 3.919549477071601, | |
| "grad_norm": 0.23451536893844604, | |
| "learning_rate": 5.587513718951165e-07, | |
| "loss": 0.0013, | |
| "num_input_tokens_seen": 4031776, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 3.9259855189058728, | |
| "grad_norm": 0.9038437604904175, | |
| "learning_rate": 5.522985636858238e-07, | |
| "loss": 0.0064, | |
| "num_input_tokens_seen": 4038208, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.9324215607401447, | |
| "grad_norm": 1.4877148866653442, | |
| "learning_rate": 5.458786026988005e-07, | |
| "loss": 0.0084, | |
| "num_input_tokens_seen": 4044928, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 3.9388576025744166, | |
| "grad_norm": 0.12848466634750366, | |
| "learning_rate": 5.394915972041739e-07, | |
| "loss": 0.0009, | |
| "num_input_tokens_seen": 4051552, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 3.9452936444086886, | |
| "grad_norm": 0.22914128005504608, | |
| "learning_rate": 5.33137654916292e-07, | |
| "loss": 0.001, | |
| "num_input_tokens_seen": 4058304, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 3.9517296862429605, | |
| "grad_norm": 0.7593125700950623, | |
| "learning_rate": 5.268168829919046e-07, | |
| "loss": 0.0064, | |
| "num_input_tokens_seen": 4064720, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 3.9581657280772324, | |
| "grad_norm": 0.6085631251335144, | |
| "learning_rate": 5.205293880283552e-07, | |
| "loss": 0.0033, | |
| "num_input_tokens_seen": 4071216, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 3.9646017699115044, | |
| "grad_norm": 0.9351167678833008, | |
| "learning_rate": 5.14275276061785e-07, | |
| "loss": 0.0065, | |
| "num_input_tokens_seen": 4077904, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 3.9710378117457763, | |
| "grad_norm": 2.1718461513519287, | |
| "learning_rate": 5.080546525653448e-07, | |
| "loss": 0.0272, | |
| "num_input_tokens_seen": 4084656, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 3.9774738535800482, | |
| "grad_norm": 0.39000532031059265, | |
| "learning_rate": 5.018676224474139e-07, | |
| "loss": 0.0015, | |
| "num_input_tokens_seen": 4091584, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 3.98390989541432, | |
| "grad_norm": 0.5723803639411926, | |
| "learning_rate": 4.957142900498335e-07, | |
| "loss": 0.0013, | |
| "num_input_tokens_seen": 4098768, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 3.990345937248592, | |
| "grad_norm": 1.5266039371490479, | |
| "learning_rate": 4.895947591461456e-07, | |
| "loss": 0.0148, | |
| "num_input_tokens_seen": 4105312, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.996781979082864, | |
| "grad_norm": 0.7928001880645752, | |
| "learning_rate": 4.835091329398436e-07, | |
| "loss": 0.0063, | |
| "num_input_tokens_seen": 4112000, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 4.003218020917136, | |
| "grad_norm": 0.09017051756381989, | |
| "learning_rate": 4.774575140626317e-07, | |
| "loss": 0.0003, | |
| "num_input_tokens_seen": 4118624, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 4.009654062751408, | |
| "grad_norm": 0.2493676394224167, | |
| "learning_rate": 4.714400045726919e-07, | |
| "loss": 0.001, | |
| "num_input_tokens_seen": 4125408, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 4.01609010458568, | |
| "grad_norm": 0.03381378576159477, | |
| "learning_rate": 4.6545670595296686e-07, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4131936, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 4.022526146419952, | |
| "grad_norm": 2.845327854156494, | |
| "learning_rate": 4.5950771910944603e-07, | |
| "loss": 0.0193, | |
| "num_input_tokens_seen": 4138352, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 4.028962188254224, | |
| "grad_norm": 0.6973279714584351, | |
| "learning_rate": 4.5359314436946275e-07, | |
| "loss": 0.0049, | |
| "num_input_tokens_seen": 4144672, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 4.035398230088496, | |
| "grad_norm": 0.3552819788455963, | |
| "learning_rate": 4.4771308148000487e-07, | |
| "loss": 0.0015, | |
| "num_input_tokens_seen": 4151296, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 4.041834271922768, | |
| "grad_norm": 0.2976234555244446, | |
| "learning_rate": 4.418676296060323e-07, | |
| "loss": 0.0019, | |
| "num_input_tokens_seen": 4157696, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 4.0482703137570395, | |
| "grad_norm": 0.6438854932785034, | |
| "learning_rate": 4.3605688732880097e-07, | |
| "loss": 0.0034, | |
| "num_input_tokens_seen": 4164352, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 4.054706355591311, | |
| "grad_norm": 0.055070556700229645, | |
| "learning_rate": 4.302809526442053e-07, | |
| "loss": 0.0003, | |
| "num_input_tokens_seen": 4170992, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 4.061142397425583, | |
| "grad_norm": 0.5393857359886169, | |
| "learning_rate": 4.2453992296112384e-07, | |
| "loss": 0.0031, | |
| "num_input_tokens_seen": 4177888, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 4.067578439259855, | |
| "grad_norm": 0.10041255503892899, | |
| "learning_rate": 4.188338950997728e-07, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 4184800, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 4.074014481094127, | |
| "grad_norm": 0.4824787378311157, | |
| "learning_rate": 4.1316296529007955e-07, | |
| "loss": 0.0027, | |
| "num_input_tokens_seen": 4191136, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 4.080450522928399, | |
| "grad_norm": 0.8842573761940002, | |
| "learning_rate": 4.075272291700558e-07, | |
| "loss": 0.0047, | |
| "num_input_tokens_seen": 4197984, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 4.086886564762671, | |
| "grad_norm": 0.0672411248087883, | |
| "learning_rate": 4.019267817841835e-07, | |
| "loss": 0.0003, | |
| "num_input_tokens_seen": 4204688, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 4.093322606596943, | |
| "grad_norm": 1.144921898841858, | |
| "learning_rate": 3.9636171758181657e-07, | |
| "loss": 0.0204, | |
| "num_input_tokens_seen": 4211360, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 4.099758648431215, | |
| "grad_norm": 1.0628600120544434, | |
| "learning_rate": 3.908321304155846e-07, | |
| "loss": 0.0043, | |
| "num_input_tokens_seen": 4218000, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 4.106194690265487, | |
| "grad_norm": 0.03438463807106018, | |
| "learning_rate": 3.853381135398093e-07, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 4224544, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 4.112630732099759, | |
| "grad_norm": 0.22854630649089813, | |
| "learning_rate": 3.798797596089351e-07, | |
| "loss": 0.0009, | |
| "num_input_tokens_seen": 4230992, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 4.119066773934031, | |
| "grad_norm": 0.12790539860725403, | |
| "learning_rate": 3.7445716067596506e-07, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 4237808, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 4.125502815768303, | |
| "grad_norm": 0.040783047676086426, | |
| "learning_rate": 3.6907040819090604e-07, | |
| "loss": 0.0003, | |
| "num_input_tokens_seen": 4244032, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 4.131938857602575, | |
| "grad_norm": 0.29912275075912476, | |
| "learning_rate": 3.63719592999231e-07, | |
| "loss": 0.0015, | |
| "num_input_tokens_seen": 4250640, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 4.1383748994368466, | |
| "grad_norm": 0.1869562268257141, | |
| "learning_rate": 3.5840480534034355e-07, | |
| "loss": 0.0012, | |
| "num_input_tokens_seen": 4257440, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 4.1448109412711185, | |
| "grad_norm": 0.6986035108566284, | |
| "learning_rate": 3.5312613484605546e-07, | |
| "loss": 0.0041, | |
| "num_input_tokens_seen": 4263936, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 4.15124698310539, | |
| "grad_norm": 1.672957181930542, | |
| "learning_rate": 3.4788367053908087e-07, | |
| "loss": 0.0087, | |
| "num_input_tokens_seen": 4270144, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 4.157683024939662, | |
| "grad_norm": 0.5007069110870361, | |
| "learning_rate": 3.4267750083152587e-07, | |
| "loss": 0.0025, | |
| "num_input_tokens_seen": 4276944, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 4.164119066773934, | |
| "grad_norm": 0.2200661450624466, | |
| "learning_rate": 3.375077135234051e-07, | |
| "loss": 0.0012, | |
| "num_input_tokens_seen": 4283488, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 4.170555108608206, | |
| "grad_norm": 1.018943190574646, | |
| "learning_rate": 3.323743958011588e-07, | |
| "loss": 0.0111, | |
| "num_input_tokens_seen": 4290000, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 4.176991150442478, | |
| "grad_norm": 0.5488151907920837, | |
| "learning_rate": 3.2727763423617915e-07, | |
| "loss": 0.0026, | |
| "num_input_tokens_seen": 4296544, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 4.18342719227675, | |
| "grad_norm": 0.12664268910884857, | |
| "learning_rate": 3.222175147833556e-07, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 4303056, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 4.189863234111022, | |
| "grad_norm": 0.04309312626719475, | |
| "learning_rate": 3.171941227796227e-07, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 4309664, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 4.196299275945294, | |
| "grad_norm": 2.9486300945281982, | |
| "learning_rate": 3.122075429425184e-07, | |
| "loss": 0.0118, | |
| "num_input_tokens_seen": 4316112, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 4.202735317779566, | |
| "grad_norm": 0.03176088631153107, | |
| "learning_rate": 3.072578593687606e-07, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4322800, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 4.209171359613838, | |
| "grad_norm": 4.464654445648193, | |
| "learning_rate": 3.0234515553282523e-07, | |
| "loss": 0.0151, | |
| "num_input_tokens_seen": 4329408, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 4.21560740144811, | |
| "grad_norm": 0.017552955076098442, | |
| "learning_rate": 2.9746951428553884e-07, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4335648, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 4.222043443282382, | |
| "grad_norm": 0.385110467672348, | |
| "learning_rate": 2.9263101785268253e-07, | |
| "loss": 0.0019, | |
| "num_input_tokens_seen": 4342256, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 4.228479485116654, | |
| "grad_norm": 0.3891147971153259, | |
| "learning_rate": 2.8782974783360534e-07, | |
| "loss": 0.0009, | |
| "num_input_tokens_seen": 4349280, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 4.2349155269509255, | |
| "grad_norm": 0.687170147895813, | |
| "learning_rate": 2.8306578519984526e-07, | |
| "loss": 0.0051, | |
| "num_input_tokens_seen": 4356128, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 4.2413515687851975, | |
| "grad_norm": 0.16641825437545776, | |
| "learning_rate": 2.783392102937682e-07, | |
| "loss": 0.0008, | |
| "num_input_tokens_seen": 4362672, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 4.247787610619469, | |
| "grad_norm": 0.02807171456515789, | |
| "learning_rate": 2.7365010282720954e-07, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4369440, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.254223652453741, | |
| "grad_norm": 1.0298210382461548, | |
| "learning_rate": 2.6899854188013054e-07, | |
| "loss": 0.0043, | |
| "num_input_tokens_seen": 4375760, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 4.260659694288013, | |
| "grad_norm": 0.15670017898082733, | |
| "learning_rate": 2.643846058992866e-07, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 4382768, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 4.267095736122285, | |
| "grad_norm": 1.2815680503845215, | |
| "learning_rate": 2.5980837269690056e-07, | |
| "loss": 0.0092, | |
| "num_input_tokens_seen": 4389424, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 4.273531777956556, | |
| "grad_norm": 0.23917140066623688, | |
| "learning_rate": 2.552699194493549e-07, | |
| "loss": 0.0006, | |
| "num_input_tokens_seen": 4395904, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 4.279967819790828, | |
| "grad_norm": 0.8005861043930054, | |
| "learning_rate": 2.507693226958871e-07, | |
| "loss": 0.0049, | |
| "num_input_tokens_seen": 4402144, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 4.2864038616251, | |
| "grad_norm": 0.8631348609924316, | |
| "learning_rate": 2.463066583372989e-07, | |
| "loss": 0.0058, | |
| "num_input_tokens_seen": 4408672, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 4.292839903459372, | |
| "grad_norm": 0.017498647794127464, | |
| "learning_rate": 2.418820016346779e-07, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4415040, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 4.299275945293644, | |
| "grad_norm": 0.8143237829208374, | |
| "learning_rate": 2.3749542720812757e-07, | |
| "loss": 0.0063, | |
| "num_input_tokens_seen": 4421696, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 4.305711987127916, | |
| "grad_norm": 0.5140169262886047, | |
| "learning_rate": 2.331470090355084e-07, | |
| "loss": 0.0116, | |
| "num_input_tokens_seen": 4428096, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 4.312148028962188, | |
| "grad_norm": 0.6097451448440552, | |
| "learning_rate": 2.2883682045119066e-07, | |
| "loss": 0.003, | |
| "num_input_tokens_seen": 4434752, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.31858407079646, | |
| "grad_norm": 0.030739160254597664, | |
| "learning_rate": 2.2456493414481778e-07, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4441584, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 4.325020112630732, | |
| "grad_norm": 0.017091860994696617, | |
| "learning_rate": 2.2033142216007913e-07, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4448464, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 4.331456154465004, | |
| "grad_norm": 0.18128401041030884, | |
| "learning_rate": 2.1613635589349756e-07, | |
| "loss": 0.0008, | |
| "num_input_tokens_seen": 4454976, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 4.337892196299276, | |
| "grad_norm": 0.0346699096262455, | |
| "learning_rate": 2.1197980609322406e-07, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 4461440, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 4.3443282381335475, | |
| "grad_norm": 0.018729638308286667, | |
| "learning_rate": 2.07861842857843e-07, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4468080, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 4.3507642799678194, | |
| "grad_norm": 1.5200186967849731, | |
| "learning_rate": 2.0378253563519247e-07, | |
| "loss": 0.0105, | |
| "num_input_tokens_seen": 4474944, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 4.357200321802091, | |
| "grad_norm": 0.03133641555905342, | |
| "learning_rate": 1.997419532211925e-07, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4481456, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 4.363636363636363, | |
| "grad_norm": 0.012541470117866993, | |
| "learning_rate": 1.9574016375868282e-07, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4487472, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 4.370072405470635, | |
| "grad_norm": 1.6271870136260986, | |
| "learning_rate": 1.9177723473627647e-07, | |
| "loss": 0.0076, | |
| "num_input_tokens_seen": 4494320, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 4.376508447304907, | |
| "grad_norm": 0.03906352072954178, | |
| "learning_rate": 1.8785323298722098e-07, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4501152, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.382944489139179, | |
| "grad_norm": 0.024355776607990265, | |
| "learning_rate": 1.839682246882682e-07, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4507376, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 4.389380530973451, | |
| "grad_norm": 0.4699815511703491, | |
| "learning_rate": 1.801222753585638e-07, | |
| "loss": 0.002, | |
| "num_input_tokens_seen": 4513904, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 4.395816572807723, | |
| "grad_norm": 1.0409318208694458, | |
| "learning_rate": 1.7631544985853623e-07, | |
| "loss": 0.0088, | |
| "num_input_tokens_seen": 4520608, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 4.402252614641995, | |
| "grad_norm": 0.42572081089019775, | |
| "learning_rate": 1.725478123888083e-07, | |
| "loss": 0.0023, | |
| "num_input_tokens_seen": 4527184, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 4.408688656476267, | |
| "grad_norm": 0.04756924882531166, | |
| "learning_rate": 1.6881942648911077e-07, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 4533696, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 4.415124698310539, | |
| "grad_norm": 0.21432961523532867, | |
| "learning_rate": 1.6513035503721213e-07, | |
| "loss": 0.0017, | |
| "num_input_tokens_seen": 4540624, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 4.421560740144811, | |
| "grad_norm": 0.035157278180122375, | |
| "learning_rate": 1.614806602478583e-07, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 4547056, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 4.427996781979083, | |
| "grad_norm": 1.0397815704345703, | |
| "learning_rate": 1.5787040367172379e-07, | |
| "loss": 0.0073, | |
| "num_input_tokens_seen": 4553712, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 4.434432823813355, | |
| "grad_norm": 0.8960546851158142, | |
| "learning_rate": 1.542996461943716e-07, | |
| "loss": 0.0013, | |
| "num_input_tokens_seen": 4560080, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 4.4408688656476265, | |
| "grad_norm": 0.1009814515709877, | |
| "learning_rate": 1.507684480352292e-07, | |
| "loss": 0.0003, | |
| "num_input_tokens_seen": 4566496, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.447304907481898, | |
| "grad_norm": 1.6075918674468994, | |
| "learning_rate": 1.4727686874657143e-07, | |
| "loss": 0.0149, | |
| "num_input_tokens_seen": 4573152, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 4.45374094931617, | |
| "grad_norm": 0.5051795840263367, | |
| "learning_rate": 1.4382496721251526e-07, | |
| "loss": 0.0026, | |
| "num_input_tokens_seen": 4580432, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 4.460176991150442, | |
| "grad_norm": 0.01903243362903595, | |
| "learning_rate": 1.4041280164802967e-07, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4587024, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 4.466613032984714, | |
| "grad_norm": 0.04848824068903923, | |
| "learning_rate": 1.3704042959795132e-07, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 4593296, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 4.473049074818986, | |
| "grad_norm": 1.155561923980713, | |
| "learning_rate": 1.3370790793601373e-07, | |
| "loss": 0.006, | |
| "num_input_tokens_seen": 4600000, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 4.479485116653258, | |
| "grad_norm": 0.06723422557115555, | |
| "learning_rate": 1.3041529286389078e-07, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 4606560, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 4.48592115848753, | |
| "grad_norm": 0.10645350813865662, | |
| "learning_rate": 1.2716263991024712e-07, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 4613424, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 4.492357200321802, | |
| "grad_norm": 0.09232950955629349, | |
| "learning_rate": 1.2395000392980057e-07, | |
| "loss": 0.0007, | |
| "num_input_tokens_seen": 4620064, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 4.498793242156074, | |
| "grad_norm": 0.04445146396756172, | |
| "learning_rate": 1.2077743910239998e-07, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 4626608, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 4.505229283990346, | |
| "grad_norm": 0.03232429176568985, | |
| "learning_rate": 1.1764499893210879e-07, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4633280, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.511665325824618, | |
| "grad_norm": 0.7786117792129517, | |
| "learning_rate": 1.145527362463042e-07, | |
| "loss": 0.0033, | |
| "num_input_tokens_seen": 4639920, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 4.51810136765889, | |
| "grad_norm": 0.15632830560207367, | |
| "learning_rate": 1.1150070319478679e-07, | |
| "loss": 0.0009, | |
| "num_input_tokens_seen": 4646736, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 4.524537409493162, | |
| "grad_norm": 2.964639186859131, | |
| "learning_rate": 1.0848895124889819e-07, | |
| "loss": 0.0216, | |
| "num_input_tokens_seen": 4653328, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 4.530973451327434, | |
| "grad_norm": 0.8460017442703247, | |
| "learning_rate": 1.0551753120065621e-07, | |
| "loss": 0.0035, | |
| "num_input_tokens_seen": 4660112, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 4.5374094931617055, | |
| "grad_norm": 1.1353970766067505, | |
| "learning_rate": 1.0258649316189722e-07, | |
| "loss": 0.0073, | |
| "num_input_tokens_seen": 4666560, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 4.543845534995977, | |
| "grad_norm": 0.08409194648265839, | |
| "learning_rate": 9.969588656342982e-08, | |
| "loss": 0.0003, | |
| "num_input_tokens_seen": 4673152, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 4.550281576830249, | |
| "grad_norm": 0.1840662956237793, | |
| "learning_rate": 9.684576015420277e-08, | |
| "loss": 0.0007, | |
| "num_input_tokens_seen": 4679360, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 4.556717618664521, | |
| "grad_norm": 0.049431972205638885, | |
| "learning_rate": 9.403616200048288e-08, | |
| "loss": 0.0003, | |
| "num_input_tokens_seen": 4685904, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 4.563153660498793, | |
| "grad_norm": 1.309277892112732, | |
| "learning_rate": 9.12671394850423e-08, | |
| "loss": 0.0168, | |
| "num_input_tokens_seen": 4692320, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 4.569589702333065, | |
| "grad_norm": 0.3398638367652893, | |
| "learning_rate": 8.85387393063622e-08, | |
| "loss": 0.0015, | |
| "num_input_tokens_seen": 4698928, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.576025744167337, | |
| "grad_norm": 0.016252102330327034, | |
| "learning_rate": 8.585100747784376e-08, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4706000, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 4.582461786001609, | |
| "grad_norm": 0.6447362303733826, | |
| "learning_rate": 8.320398932703145e-08, | |
| "loss": 0.0036, | |
| "num_input_tokens_seen": 4712352, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 4.588897827835881, | |
| "grad_norm": 0.5575013756752014, | |
| "learning_rate": 8.059772949485068e-08, | |
| "loss": 0.0022, | |
| "num_input_tokens_seen": 4718848, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 4.595333869670153, | |
| "grad_norm": 1.0512995719909668, | |
| "learning_rate": 7.803227193485336e-08, | |
| "loss": 0.0085, | |
| "num_input_tokens_seen": 4725728, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 4.601769911504425, | |
| "grad_norm": 0.012490477412939072, | |
| "learning_rate": 7.550765991247655e-08, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4732352, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 4.608205953338697, | |
| "grad_norm": 0.025810543447732925, | |
| "learning_rate": 7.30239360043139e-08, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4738992, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 4.614641995172969, | |
| "grad_norm": 0.5250550508499146, | |
| "learning_rate": 7.058114209739675e-08, | |
| "loss": 0.007, | |
| "num_input_tokens_seen": 4745872, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 4.621078037007241, | |
| "grad_norm": 0.05289880558848381, | |
| "learning_rate": 6.817931938848805e-08, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 4752544, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 4.627514078841513, | |
| "grad_norm": 0.0455067902803421, | |
| "learning_rate": 6.581850838338816e-08, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 4759360, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 4.6339501206757845, | |
| "grad_norm": 0.1569470316171646, | |
| "learning_rate": 6.349874889624963e-08, | |
| "loss": 0.0008, | |
| "num_input_tokens_seen": 4766016, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.640386162510056, | |
| "grad_norm": 0.7371820211410522, | |
| "learning_rate": 6.12200800489085e-08, | |
| "loss": 0.0043, | |
| "num_input_tokens_seen": 4772624, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 4.646822204344328, | |
| "grad_norm": 0.09805099666118622, | |
| "learning_rate": 5.898254027022293e-08, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 4778960, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 4.6532582461786, | |
| "grad_norm": 0.6390008926391602, | |
| "learning_rate": 5.678616729542535e-08, | |
| "loss": 0.0024, | |
| "num_input_tokens_seen": 4785600, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 4.659694288012872, | |
| "grad_norm": 0.4965854585170746, | |
| "learning_rate": 5.463099816548578e-08, | |
| "loss": 0.0024, | |
| "num_input_tokens_seen": 4792208, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 4.666130329847144, | |
| "grad_norm": 0.012886490672826767, | |
| "learning_rate": 5.2517069226488694e-08, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4798816, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 4.672566371681416, | |
| "grad_norm": 0.04072566702961922, | |
| "learning_rate": 5.044441612901768e-08, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4805408, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 4.679002413515688, | |
| "grad_norm": 0.07961362600326538, | |
| "learning_rate": 4.841307382755567e-08, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 4812480, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 4.68543845534996, | |
| "grad_norm": 0.006482974626123905, | |
| "learning_rate": 4.6423076579895646e-08, | |
| "loss": 0.0, | |
| "num_input_tokens_seen": 4819360, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 4.691874497184232, | |
| "grad_norm": 0.06561946123838425, | |
| "learning_rate": 4.4474457946562245e-08, | |
| "loss": 0.0003, | |
| "num_input_tokens_seen": 4825904, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 4.698310539018504, | |
| "grad_norm": 0.10200546681880951, | |
| "learning_rate": 4.256725079024554e-08, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 4832544, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.704746580852776, | |
| "grad_norm": 0.2905844449996948, | |
| "learning_rate": 4.070148727524814e-08, | |
| "loss": 0.001, | |
| "num_input_tokens_seen": 4838960, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 4.711182622687048, | |
| "grad_norm": 0.013804874382913113, | |
| "learning_rate": 3.887719886694091e-08, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4845584, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 4.71761866452132, | |
| "grad_norm": 0.8108282685279846, | |
| "learning_rate": 3.709441633123367e-08, | |
| "loss": 0.0031, | |
| "num_input_tokens_seen": 4852416, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 4.7240547063555915, | |
| "grad_norm": 0.11679325252771378, | |
| "learning_rate": 3.535316973405672e-08, | |
| "loss": 0.0006, | |
| "num_input_tokens_seen": 4858864, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 4.7304907481898635, | |
| "grad_norm": 0.013299252837896347, | |
| "learning_rate": 3.3653488440851255e-08, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4865552, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 4.736926790024135, | |
| "grad_norm": 0.8139033913612366, | |
| "learning_rate": 3.1995401116077516e-08, | |
| "loss": 0.006, | |
| "num_input_tokens_seen": 4871984, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 4.743362831858407, | |
| "grad_norm": 0.023770008236169815, | |
| "learning_rate": 3.037893572272937e-08, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4878688, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 4.749798873692679, | |
| "grad_norm": 0.1465252786874771, | |
| "learning_rate": 2.8804119521862183e-08, | |
| "loss": 0.001, | |
| "num_input_tokens_seen": 4885504, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 4.756234915526951, | |
| "grad_norm": 0.7871643304824829, | |
| "learning_rate": 2.7270979072135106e-08, | |
| "loss": 0.0019, | |
| "num_input_tokens_seen": 4892272, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 4.762670957361223, | |
| "grad_norm": 0.6069658994674683, | |
| "learning_rate": 2.5779540229361744e-08, | |
| "loss": 0.0047, | |
| "num_input_tokens_seen": 4898848, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 4.769106999195495, | |
| "grad_norm": 0.07687046378850937, | |
| "learning_rate": 2.4329828146074096e-08, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 4905392, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 4.775543041029767, | |
| "grad_norm": 0.0760512426495552, | |
| "learning_rate": 2.2921867271099296e-08, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 4912672, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 4.781979082864039, | |
| "grad_norm": 0.14020369946956635, | |
| "learning_rate": 2.155568134914604e-08, | |
| "loss": 0.0006, | |
| "num_input_tokens_seen": 4919472, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 4.788415124698311, | |
| "grad_norm": 0.020505385473370552, | |
| "learning_rate": 2.0231293420405194e-08, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4926448, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 4.794851166532583, | |
| "grad_norm": 0.6544126868247986, | |
| "learning_rate": 1.8948725820160663e-08, | |
| "loss": 0.0038, | |
| "num_input_tokens_seen": 4933152, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 4.801287208366855, | |
| "grad_norm": 1.0113900899887085, | |
| "learning_rate": 1.770800017841301e-08, | |
| "loss": 0.0088, | |
| "num_input_tokens_seen": 4939568, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 4.807723250201127, | |
| "grad_norm": 0.05956251546740532, | |
| "learning_rate": 1.650913741951421e-08, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 4946224, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 4.814159292035399, | |
| "grad_norm": 0.21396887302398682, | |
| "learning_rate": 1.5352157761815978e-08, | |
| "loss": 0.002, | |
| "num_input_tokens_seen": 4952880, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 4.8205953338696705, | |
| "grad_norm": 0.3376445472240448, | |
| "learning_rate": 1.4237080717326712e-08, | |
| "loss": 0.0012, | |
| "num_input_tokens_seen": 4960288, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 4.8270313757039425, | |
| "grad_norm": 0.47905248403549194, | |
| "learning_rate": 1.3163925091384532e-08, | |
| "loss": 0.0038, | |
| "num_input_tokens_seen": 4966656, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.833467417538214, | |
| "grad_norm": 0.08333203196525574, | |
| "learning_rate": 1.2132708982338925e-08, | |
| "loss": 0.0003, | |
| "num_input_tokens_seen": 4973184, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 4.839903459372486, | |
| "grad_norm": 0.45923787355422974, | |
| "learning_rate": 1.1143449781245985e-08, | |
| "loss": 0.0023, | |
| "num_input_tokens_seen": 4980080, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 4.846339501206758, | |
| "grad_norm": 0.038865748792886734, | |
| "learning_rate": 1.0196164171574762e-08, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 4987104, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 4.85277554304103, | |
| "grad_norm": 0.2948670983314514, | |
| "learning_rate": 9.290868128926378e-09, | |
| "loss": 0.0044, | |
| "num_input_tokens_seen": 4993728, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 4.859211584875302, | |
| "grad_norm": 0.5785093903541565, | |
| "learning_rate": 8.427576920763957e-09, | |
| "loss": 0.004, | |
| "num_input_tokens_seen": 5000368, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 4.865647626709574, | |
| "grad_norm": 0.29199209809303284, | |
| "learning_rate": 7.606305106155898e-09, | |
| "loss": 0.0042, | |
| "num_input_tokens_seen": 5006768, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 4.872083668543846, | |
| "grad_norm": 0.25938865542411804, | |
| "learning_rate": 6.827066535529947e-09, | |
| "loss": 0.0004, | |
| "num_input_tokens_seen": 5013264, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 4.878519710378118, | |
| "grad_norm": 0.1507510095834732, | |
| "learning_rate": 6.089874350439507e-09, | |
| "loss": 0.0005, | |
| "num_input_tokens_seen": 5019744, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 4.88495575221239, | |
| "grad_norm": 0.009750754572451115, | |
| "learning_rate": 5.394740983341862e-09, | |
| "loss": 0.0, | |
| "num_input_tokens_seen": 5026080, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 4.891391794046662, | |
| "grad_norm": 0.049791790544986725, | |
| "learning_rate": 4.74167815738974e-09, | |
| "loss": 0.0001, | |
| "num_input_tokens_seen": 5032544, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.897827835880933, | |
| "grad_norm": 0.971774160861969, | |
| "learning_rate": 4.130696886231744e-09, | |
| "loss": 0.0089, | |
| "num_input_tokens_seen": 5039248, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 4.904263877715205, | |
| "grad_norm": 0.9287542700767517, | |
| "learning_rate": 3.561807473827783e-09, | |
| "loss": 0.0118, | |
| "num_input_tokens_seen": 5045792, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 4.910699919549477, | |
| "grad_norm": 0.06942977011203766, | |
| "learning_rate": 3.035019514275317e-09, | |
| "loss": 0.0003, | |
| "num_input_tokens_seen": 5052320, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 4.917135961383749, | |
| "grad_norm": 0.28999796509742737, | |
| "learning_rate": 2.5503418916464352e-09, | |
| "loss": 0.0013, | |
| "num_input_tokens_seen": 5059200, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 4.923572003218021, | |
| "grad_norm": 0.8473367691040039, | |
| "learning_rate": 2.1077827798404728e-09, | |
| "loss": 0.0058, | |
| "num_input_tokens_seen": 5065824, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 4.9300080450522925, | |
| "grad_norm": 0.14826533198356628, | |
| "learning_rate": 1.707349642442735e-09, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 5072080, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 4.936444086886564, | |
| "grad_norm": 0.9073830246925354, | |
| "learning_rate": 1.349049232601818e-09, | |
| "loss": 0.0059, | |
| "num_input_tokens_seen": 5078336, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 4.942880128720836, | |
| "grad_norm": 1.048936367034912, | |
| "learning_rate": 1.0328875929138671e-09, | |
| "loss": 0.0068, | |
| "num_input_tokens_seen": 5085152, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 4.949316170555108, | |
| "grad_norm": 0.06418836861848831, | |
| "learning_rate": 7.588700553209926e-10, | |
| "loss": 0.0003, | |
| "num_input_tokens_seen": 5092080, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 4.95575221238938, | |
| "grad_norm": 2.129972457885742, | |
| "learning_rate": 5.270012410216185e-10, | |
| "loss": 0.0251, | |
| "num_input_tokens_seen": 5098960, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.962188254223652, | |
| "grad_norm": 0.3871181309223175, | |
| "learning_rate": 3.3728506039276686e-10, | |
| "loss": 0.0054, | |
| "num_input_tokens_seen": 5105648, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 4.968624296057924, | |
| "grad_norm": 0.185493603348732, | |
| "learning_rate": 1.8972471292344474e-10, | |
| "loss": 0.0007, | |
| "num_input_tokens_seen": 5112096, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 4.975060337892196, | |
| "grad_norm": 0.05420377105474472, | |
| "learning_rate": 8.432268716135338e-11, | |
| "loss": 0.0002, | |
| "num_input_tokens_seen": 5118960, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 4.981496379726468, | |
| "grad_norm": 0.5486555695533752, | |
| "learning_rate": 2.108076067014464e-11, | |
| "loss": 0.0032, | |
| "num_input_tokens_seen": 5125872, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 4.98793242156074, | |
| "grad_norm": 1.0398619174957275, | |
| "learning_rate": 0.0, | |
| "loss": 0.0107, | |
| "num_input_tokens_seen": 5132288, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 4.98793242156074, | |
| "num_input_tokens_seen": 5132288, | |
| "step": 775, | |
| "total_flos": 2.3110461174474342e+17, | |
| "train_loss": 0.14078616270634925, | |
| "train_runtime": 10317.9615, | |
| "train_samples_per_second": 9.634, | |
| "train_steps_per_second": 0.075 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 775, | |
| "num_input_tokens_seen": 5132288, | |
| "num_train_epochs": 5, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.3110461174474342e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |