| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.214190093708166, |
| "eval_steps": 500, |
| "global_step": 100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00214190093708166, |
| "grad_norm": 2.276703357696533, |
| "learning_rate": 5e-05, |
| "loss": 2.8976, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00428380187416332, |
| "grad_norm": 2.1879231929779053, |
| "learning_rate": 0.0001, |
| "loss": 2.9318, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00642570281124498, |
| "grad_norm": 2.1265320777893066, |
| "learning_rate": 0.00015, |
| "loss": 2.8007, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00856760374832664, |
| "grad_norm": 1.979961633682251, |
| "learning_rate": 0.0002, |
| "loss": 2.4979, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0107095046854083, |
| "grad_norm": 1.4055116176605225, |
| "learning_rate": 0.00025, |
| "loss": 2.2273, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01285140562248996, |
| "grad_norm": 0.9101312160491943, |
| "learning_rate": 0.0003, |
| "loss": 1.9513, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01499330655957162, |
| "grad_norm": 1.0588798522949219, |
| "learning_rate": 0.00035, |
| "loss": 1.9132, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01713520749665328, |
| "grad_norm": 0.9746605157852173, |
| "learning_rate": 0.0004, |
| "loss": 1.8825, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01927710843373494, |
| "grad_norm": 0.6882264614105225, |
| "learning_rate": 0.00045000000000000004, |
| "loss": 1.8363, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0214190093708166, |
| "grad_norm": 0.6719866394996643, |
| "learning_rate": 0.0005, |
| "loss": 1.8315, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02356091030789826, |
| "grad_norm": 0.5078733563423157, |
| "learning_rate": 0.0004989035087719298, |
| "loss": 1.7802, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02570281124497992, |
| "grad_norm": 0.5650719404220581, |
| "learning_rate": 0.0004978070175438597, |
| "loss": 1.766, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.027844712182061578, |
| "grad_norm": 0.8591753244400024, |
| "learning_rate": 0.0004967105263157895, |
| "loss": 1.7927, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.02998661311914324, |
| "grad_norm": 0.7662492990493774, |
| "learning_rate": 0.0004956140350877193, |
| "loss": 1.7698, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0321285140562249, |
| "grad_norm": 0.4786683917045593, |
| "learning_rate": 0.0004945175438596491, |
| "loss": 1.7588, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03427041499330656, |
| "grad_norm": 0.35064077377319336, |
| "learning_rate": 0.000493421052631579, |
| "loss": 1.7248, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03641231593038822, |
| "grad_norm": 0.3991241157054901, |
| "learning_rate": 0.0004923245614035088, |
| "loss": 1.7121, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03855421686746988, |
| "grad_norm": 0.31328240036964417, |
| "learning_rate": 0.0004912280701754386, |
| "loss": 1.6815, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04069611780455154, |
| "grad_norm": 0.3583672046661377, |
| "learning_rate": 0.0004901315789473684, |
| "loss": 1.7442, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0428380187416332, |
| "grad_norm": 0.3154083788394928, |
| "learning_rate": 0.0004890350877192983, |
| "loss": 1.6774, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04497991967871486, |
| "grad_norm": 0.3431992530822754, |
| "learning_rate": 0.0004879385964912281, |
| "loss": 1.6866, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04712182061579652, |
| "grad_norm": 0.2965812385082245, |
| "learning_rate": 0.0004868421052631579, |
| "loss": 1.683, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.04926372155287818, |
| "grad_norm": 0.33691295981407166, |
| "learning_rate": 0.0004857456140350877, |
| "loss": 1.7191, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.05140562248995984, |
| "grad_norm": 0.26985007524490356, |
| "learning_rate": 0.00048464912280701757, |
| "loss": 1.6602, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0535475234270415, |
| "grad_norm": 0.3060872256755829, |
| "learning_rate": 0.00048355263157894734, |
| "loss": 1.671, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.055689424364123156, |
| "grad_norm": 0.2901049852371216, |
| "learning_rate": 0.0004824561403508772, |
| "loss": 1.6659, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.05783132530120482, |
| "grad_norm": 0.2690636217594147, |
| "learning_rate": 0.00048135964912280704, |
| "loss": 1.6627, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.05997322623828648, |
| "grad_norm": 0.2978563904762268, |
| "learning_rate": 0.00048026315789473687, |
| "loss": 1.6819, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.062115127175368136, |
| "grad_norm": 0.2706882953643799, |
| "learning_rate": 0.0004791666666666667, |
| "loss": 1.6148, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0642570281124498, |
| "grad_norm": 0.32384687662124634, |
| "learning_rate": 0.00047807017543859647, |
| "loss": 1.6874, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06639892904953146, |
| "grad_norm": 0.27521777153015137, |
| "learning_rate": 0.00047697368421052635, |
| "loss": 1.6337, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.06854082998661312, |
| "grad_norm": 0.3605692684650421, |
| "learning_rate": 0.0004758771929824561, |
| "loss": 1.7127, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.07068273092369477, |
| "grad_norm": 0.30055901408195496, |
| "learning_rate": 0.000474780701754386, |
| "loss": 1.6319, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.07282463186077644, |
| "grad_norm": 0.30625537037849426, |
| "learning_rate": 0.00047368421052631577, |
| "loss": 1.5903, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0749665327978581, |
| "grad_norm": 0.2736397385597229, |
| "learning_rate": 0.00047258771929824565, |
| "loss": 1.6318, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.07710843373493977, |
| "grad_norm": 0.3148088753223419, |
| "learning_rate": 0.0004714912280701755, |
| "loss": 1.6613, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07925033467202142, |
| "grad_norm": 0.28139254450798035, |
| "learning_rate": 0.00047039473684210524, |
| "loss": 1.6213, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.08139223560910308, |
| "grad_norm": 0.27580636739730835, |
| "learning_rate": 0.0004692982456140351, |
| "loss": 1.6694, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.08353413654618475, |
| "grad_norm": 0.26056671142578125, |
| "learning_rate": 0.0004682017543859649, |
| "loss": 1.5788, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0856760374832664, |
| "grad_norm": 0.2735174596309662, |
| "learning_rate": 0.0004671052631578948, |
| "loss": 1.5878, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08781793842034806, |
| "grad_norm": 0.2627701461315155, |
| "learning_rate": 0.00046600877192982455, |
| "loss": 1.5928, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.08995983935742972, |
| "grad_norm": 0.2687063217163086, |
| "learning_rate": 0.00046491228070175437, |
| "loss": 1.6034, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09210174029451138, |
| "grad_norm": 0.26949378848075867, |
| "learning_rate": 0.00046381578947368425, |
| "loss": 1.63, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.09424364123159304, |
| "grad_norm": 0.30868563055992126, |
| "learning_rate": 0.000462719298245614, |
| "loss": 1.6904, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0963855421686747, |
| "grad_norm": 0.28212249279022217, |
| "learning_rate": 0.0004616228070175439, |
| "loss": 1.6194, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.09852744310575635, |
| "grad_norm": 0.308405339717865, |
| "learning_rate": 0.0004605263157894737, |
| "loss": 1.5981, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.10066934404283802, |
| "grad_norm": 0.27064043283462524, |
| "learning_rate": 0.00045942982456140355, |
| "loss": 1.6407, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.10281124497991968, |
| "grad_norm": 0.3150207996368408, |
| "learning_rate": 0.0004583333333333333, |
| "loss": 1.6053, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.10495314591700133, |
| "grad_norm": 0.27907341718673706, |
| "learning_rate": 0.00045723684210526315, |
| "loss": 1.6318, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.107095046854083, |
| "grad_norm": 0.277063250541687, |
| "learning_rate": 0.000456140350877193, |
| "loss": 1.6621, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.10923694779116466, |
| "grad_norm": 0.2763706147670746, |
| "learning_rate": 0.0004550438596491228, |
| "loss": 1.6968, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.11137884872824631, |
| "grad_norm": 0.2664422392845154, |
| "learning_rate": 0.0004539473684210527, |
| "loss": 1.5741, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.11352074966532798, |
| "grad_norm": 0.2769760191440582, |
| "learning_rate": 0.00045285087719298245, |
| "loss": 1.594, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.11566265060240964, |
| "grad_norm": 0.26781827211380005, |
| "learning_rate": 0.00045175438596491233, |
| "loss": 1.6207, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.11780455153949129, |
| "grad_norm": 0.2635057270526886, |
| "learning_rate": 0.0004506578947368421, |
| "loss": 1.5854, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.11994645247657296, |
| "grad_norm": 0.26083359122276306, |
| "learning_rate": 0.00044956140350877193, |
| "loss": 1.625, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.12208835341365462, |
| "grad_norm": 0.29161372780799866, |
| "learning_rate": 0.00044846491228070175, |
| "loss": 1.5673, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.12423025435073627, |
| "grad_norm": 0.2713761031627655, |
| "learning_rate": 0.0004473684210526316, |
| "loss": 1.599, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.12637215528781795, |
| "grad_norm": 0.2928602397441864, |
| "learning_rate": 0.00044627192982456146, |
| "loss": 1.6004, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.1285140562248996, |
| "grad_norm": 0.27757158875465393, |
| "learning_rate": 0.00044517543859649123, |
| "loss": 1.6166, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13065595716198125, |
| "grad_norm": 0.276129812002182, |
| "learning_rate": 0.00044407894736842106, |
| "loss": 1.5621, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.13279785809906292, |
| "grad_norm": 0.28425729274749756, |
| "learning_rate": 0.0004429824561403509, |
| "loss": 1.5909, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.13493975903614458, |
| "grad_norm": 0.2670615315437317, |
| "learning_rate": 0.0004418859649122807, |
| "loss": 1.586, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.13708165997322624, |
| "grad_norm": 0.28098562359809875, |
| "learning_rate": 0.00044078947368421053, |
| "loss": 1.5765, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.1392235609103079, |
| "grad_norm": 0.2926560938358307, |
| "learning_rate": 0.00043969298245614036, |
| "loss": 1.6543, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.14136546184738955, |
| "grad_norm": 0.3022347688674927, |
| "learning_rate": 0.0004385964912280702, |
| "loss": 1.5576, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.1435073627844712, |
| "grad_norm": 0.293224036693573, |
| "learning_rate": 0.0004375, |
| "loss": 1.6057, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.14564926372155287, |
| "grad_norm": 0.2796039581298828, |
| "learning_rate": 0.00043640350877192983, |
| "loss": 1.5884, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.14779116465863454, |
| "grad_norm": 0.2742992639541626, |
| "learning_rate": 0.00043530701754385966, |
| "loss": 1.5917, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.1499330655957162, |
| "grad_norm": 0.26762816309928894, |
| "learning_rate": 0.0004342105263157895, |
| "loss": 1.6002, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.15207496653279787, |
| "grad_norm": 0.2658008337020874, |
| "learning_rate": 0.0004331140350877193, |
| "loss": 1.5943, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.15421686746987953, |
| "grad_norm": 0.2861701250076294, |
| "learning_rate": 0.00043201754385964914, |
| "loss": 1.561, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.15635876840696117, |
| "grad_norm": 0.27908167243003845, |
| "learning_rate": 0.00043092105263157896, |
| "loss": 1.5311, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.15850066934404283, |
| "grad_norm": 0.2818601131439209, |
| "learning_rate": 0.0004298245614035088, |
| "loss": 1.5793, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.1606425702811245, |
| "grad_norm": 0.2770632207393646, |
| "learning_rate": 0.00042872807017543856, |
| "loss": 1.6198, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.16278447121820616, |
| "grad_norm": 0.273129940032959, |
| "learning_rate": 0.00042763157894736844, |
| "loss": 1.5565, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.16492637215528783, |
| "grad_norm": 0.2815646827220917, |
| "learning_rate": 0.00042653508771929826, |
| "loss": 1.6014, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.1670682730923695, |
| "grad_norm": 0.2931772470474243, |
| "learning_rate": 0.0004254385964912281, |
| "loss": 1.5793, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.16921017402945113, |
| "grad_norm": 0.30830061435699463, |
| "learning_rate": 0.0004243421052631579, |
| "loss": 1.6039, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.1713520749665328, |
| "grad_norm": 0.27675527334213257, |
| "learning_rate": 0.0004232456140350877, |
| "loss": 1.5879, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.17349397590361446, |
| "grad_norm": 0.3058152496814728, |
| "learning_rate": 0.00042214912280701757, |
| "loss": 1.5907, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.17563587684069612, |
| "grad_norm": 0.284934401512146, |
| "learning_rate": 0.00042105263157894734, |
| "loss": 1.5597, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 0.2928430736064911, |
| "learning_rate": 0.0004199561403508772, |
| "loss": 1.6047, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.17991967871485945, |
| "grad_norm": 0.2680971026420593, |
| "learning_rate": 0.00041885964912280704, |
| "loss": 1.5193, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.18206157965194109, |
| "grad_norm": 0.2945151627063751, |
| "learning_rate": 0.00041776315789473687, |
| "loss": 1.602, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.18420348058902275, |
| "grad_norm": 0.2915259599685669, |
| "learning_rate": 0.0004166666666666667, |
| "loss": 1.577, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.18634538152610441, |
| "grad_norm": 0.2818587124347687, |
| "learning_rate": 0.00041557017543859646, |
| "loss": 1.6168, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.18848728246318608, |
| "grad_norm": 0.2791883051395416, |
| "learning_rate": 0.00041447368421052634, |
| "loss": 1.5776, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.19062918340026774, |
| "grad_norm": 0.27820897102355957, |
| "learning_rate": 0.0004133771929824561, |
| "loss": 1.5652, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.1927710843373494, |
| "grad_norm": 0.3035961091518402, |
| "learning_rate": 0.000412280701754386, |
| "loss": 1.5602, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.19491298527443104, |
| "grad_norm": 0.28438517451286316, |
| "learning_rate": 0.00041118421052631577, |
| "loss": 1.5948, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.1970548862115127, |
| "grad_norm": 0.28583455085754395, |
| "learning_rate": 0.00041008771929824565, |
| "loss": 1.5382, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.19919678714859437, |
| "grad_norm": 0.2928229570388794, |
| "learning_rate": 0.00040899122807017547, |
| "loss": 1.551, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.20133868808567604, |
| "grad_norm": 0.29007840156555176, |
| "learning_rate": 0.00040789473684210524, |
| "loss": 1.4906, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.2034805890227577, |
| "grad_norm": 0.2896357774734497, |
| "learning_rate": 0.0004067982456140351, |
| "loss": 1.578, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.20562248995983937, |
| "grad_norm": 0.2787824273109436, |
| "learning_rate": 0.0004057017543859649, |
| "loss": 1.5659, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.20776439089692103, |
| "grad_norm": 0.27766159176826477, |
| "learning_rate": 0.0004046052631578948, |
| "loss": 1.5615, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.20990629183400267, |
| "grad_norm": 0.28936296701431274, |
| "learning_rate": 0.00040350877192982455, |
| "loss": 1.5503, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.21204819277108433, |
| "grad_norm": 0.27507284283638, |
| "learning_rate": 0.00040241228070175437, |
| "loss": 1.4995, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.214190093708166, |
| "grad_norm": 0.29356110095977783, |
| "learning_rate": 0.00040131578947368425, |
| "loss": 1.5048, |
| "step": 100 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 466, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.796154773307392e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|