| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.023631723225257587, | |
| "eval_steps": 1000, | |
| "global_step": 125, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 13.5045, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4e-05, | |
| "loss": 13.2514, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4e-05, | |
| "loss": 14.3156, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4e-05, | |
| "loss": 13.0763, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 8e-05, | |
| "loss": 12.9353, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00012, | |
| "loss": 12.8562, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00016, | |
| "loss": 12.6139, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0002, | |
| "loss": 12.8862, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0002, | |
| "loss": 12.0442, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0001995967741935484, | |
| "loss": 11.959, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0001991935483870968, | |
| "loss": 11.7023, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019879032258064515, | |
| "loss": 10.7218, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019838709677419357, | |
| "loss": 10.9581, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019798387096774193, | |
| "loss": 11.3065, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019758064516129035, | |
| "loss": 10.2943, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0001971774193548387, | |
| "loss": 10.1651, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0001967741935483871, | |
| "loss": 9.8951, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0001963709677419355, | |
| "loss": 9.6734, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019596774193548388, | |
| "loss": 9.4096, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019556451612903227, | |
| "loss": 9.0653, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019516129032258066, | |
| "loss": 8.9835, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019475806451612905, | |
| "loss": 9.399, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019435483870967744, | |
| "loss": 9.1367, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0001939516129032258, | |
| "loss": 8.4746, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019354838709677422, | |
| "loss": 8.4179, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019314516129032258, | |
| "loss": 8.6536, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.000192741935483871, | |
| "loss": 8.2193, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019233870967741936, | |
| "loss": 8.3287, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019193548387096775, | |
| "loss": 8.1268, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019153225806451614, | |
| "loss": 8.2402, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019112903225806453, | |
| "loss": 7.9379, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019072580645161291, | |
| "loss": 8.2621, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001903225806451613, | |
| "loss": 8.0542, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001899193548387097, | |
| "loss": 7.9747, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018951612903225808, | |
| "loss": 7.9519, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018911290322580645, | |
| "loss": 7.7444, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018870967741935486, | |
| "loss": 7.9391, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018830645161290322, | |
| "loss": 7.9026, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018790322580645164, | |
| "loss": 7.8992, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001875, | |
| "loss": 7.7428, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001870967741935484, | |
| "loss": 7.7166, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018669354838709678, | |
| "loss": 7.7254, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018629032258064517, | |
| "loss": 7.5929, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018588709677419356, | |
| "loss": 7.4957, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018548387096774192, | |
| "loss": 7.805, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018508064516129034, | |
| "loss": 7.91, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001846774193548387, | |
| "loss": 7.6839, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018427419354838712, | |
| "loss": 7.7526, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018387096774193548, | |
| "loss": 7.9365, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018346774193548387, | |
| "loss": 7.455, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018306451612903226, | |
| "loss": 7.5666, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018266129032258065, | |
| "loss": 7.5442, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018225806451612904, | |
| "loss": 7.7723, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018185483870967743, | |
| "loss": 7.5818, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018145161290322582, | |
| "loss": 7.5512, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001810483870967742, | |
| "loss": 7.7454, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00018064516129032257, | |
| "loss": 7.5012, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.000180241935483871, | |
| "loss": 7.6233, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00017983870967741935, | |
| "loss": 7.6972, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00017943548387096777, | |
| "loss": 7.6155, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00017903225806451613, | |
| "loss": 7.4599, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00017862903225806452, | |
| "loss": 7.7154, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001782258064516129, | |
| "loss": 7.598, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001778225806451613, | |
| "loss": 7.4523, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001774193548387097, | |
| "loss": 7.5589, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00017701612903225808, | |
| "loss": 7.4396, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00017661290322580647, | |
| "loss": 7.6101, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00017620967741935486, | |
| "loss": 7.5708, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00017580645161290325, | |
| "loss": 7.3261, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00017540322580645164, | |
| "loss": 7.4395, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.000175, | |
| "loss": 7.2618, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001745967741935484, | |
| "loss": 7.1965, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00017419354838709678, | |
| "loss": 7.1342, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00017379032258064517, | |
| "loss": 7.1008, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00017338709677419356, | |
| "loss": 7.0397, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00017298387096774195, | |
| "loss": 7.0406, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00017258064516129034, | |
| "loss": 6.9318, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001721774193548387, | |
| "loss": 6.9892, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00017177419354838711, | |
| "loss": 6.5501, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00017137096774193548, | |
| "loss": 6.8337, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001709677419354839, | |
| "loss": 6.4362, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00017056451612903226, | |
| "loss": 6.1646, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00017016129032258065, | |
| "loss": 6.3416, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016975806451612904, | |
| "loss": 6.3828, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016935483870967742, | |
| "loss": 6.4853, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016895161290322581, | |
| "loss": 5.7598, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001685483870967742, | |
| "loss": 6.3524, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001681451612903226, | |
| "loss": 5.8323, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016774193548387098, | |
| "loss": 6.16, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016733870967741935, | |
| "loss": 5.9581, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016693548387096776, | |
| "loss": 5.741, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016653225806451612, | |
| "loss": 6.0302, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016612903225806454, | |
| "loss": 5.6973, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001657258064516129, | |
| "loss": 6.1547, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016532258064516132, | |
| "loss": 5.5362, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016491935483870968, | |
| "loss": 5.8757, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016451612903225807, | |
| "loss": 5.1669, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016411290322580646, | |
| "loss": 5.6761, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016370967741935485, | |
| "loss": 5.3759, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016330645161290324, | |
| "loss": 5.6455, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016290322580645163, | |
| "loss": 5.5912, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016250000000000002, | |
| "loss": 5.3943, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016209677419354838, | |
| "loss": 5.5953, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016169354838709677, | |
| "loss": 5.4715, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016129032258064516, | |
| "loss": 5.2222, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016088709677419355, | |
| "loss": 5.3243, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016048387096774194, | |
| "loss": 4.9239, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00016008064516129033, | |
| "loss": 5.232, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00015967741935483872, | |
| "loss": 5.3394, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001592741935483871, | |
| "loss": 4.5978, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00015887096774193547, | |
| "loss": 5.0104, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001584677419354839, | |
| "loss": 5.2945, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00015806451612903225, | |
| "loss": 4.9366, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00015766129032258067, | |
| "loss": 4.8425, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00015725806451612903, | |
| "loss": 5.0086, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00015685483870967742, | |
| "loss": 4.7297, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001564516129032258, | |
| "loss": 4.834, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001560483870967742, | |
| "loss": 4.9643, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001556451612903226, | |
| "loss": 4.7391, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00015524193548387098, | |
| "loss": 4.8393, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00015483870967741937, | |
| "loss": 4.9257, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00015443548387096776, | |
| "loss": 5.0117, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00015403225806451615, | |
| "loss": 4.643, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00015362903225806454, | |
| "loss": 5.0408, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001532258064516129, | |
| "loss": 4.9834, | |
| "step": 125 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 501, | |
| "num_train_epochs": 1, | |
| "save_steps": 25, | |
| "total_flos": 184698227957760.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |