| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9626955475330926, |
| "eval_steps": 500, |
| "global_step": 100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019902912621359224, |
| "loss": 2.6183, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019805825242718447, |
| "loss": 2.2731, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0001970873786407767, |
| "loss": 2.4264, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019611650485436895, |
| "loss": 2.3803, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019514563106796118, |
| "loss": 2.2789, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.0001941747572815534, |
| "loss": 2.5586, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00019320388349514564, |
| "loss": 2.3255, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019223300970873787, |
| "loss": 2.2983, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0001912621359223301, |
| "loss": 2.1903, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019029126213592236, |
| "loss": 2.3516, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00018932038834951458, |
| "loss": 2.215, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00018834951456310681, |
| "loss": 2.2354, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00018737864077669904, |
| "loss": 2.2487, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00018640776699029127, |
| "loss": 2.1957, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0001854368932038835, |
| "loss": 2.2036, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00018446601941747576, |
| "loss": 2.1787, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00018349514563106799, |
| "loss": 2.1839, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00018252427184466022, |
| "loss": 2.1533, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00018155339805825244, |
| "loss": 2.2554, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00018058252427184467, |
| "loss": 2.2778, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0001796116504854369, |
| "loss": 2.382, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00017864077669902913, |
| "loss": 2.0803, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0001776699029126214, |
| "loss": 2.26, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00017669902912621362, |
| "loss": 2.2557, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00017572815533980585, |
| "loss": 2.0614, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00017475728155339805, |
| "loss": 2.1342, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00017378640776699028, |
| "loss": 2.1781, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00017281553398058253, |
| "loss": 2.1828, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00017184466019417476, |
| "loss": 2.0557, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.000170873786407767, |
| "loss": 2.1914, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00016990291262135922, |
| "loss": 2.3306, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00016893203883495145, |
| "loss": 2.2901, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00016796116504854368, |
| "loss": 2.1166, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00016699029126213594, |
| "loss": 2.2927, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00016601941747572817, |
| "loss": 2.2732, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0001650485436893204, |
| "loss": 2.1614, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00016407766990291262, |
| "loss": 2.1986, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00016310679611650485, |
| "loss": 2.3506, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00016213592233009708, |
| "loss": 2.2425, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0001611650485436893, |
| "loss": 2.2483, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00016019417475728157, |
| "loss": 2.031, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0001592233009708738, |
| "loss": 2.1587, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00015825242718446603, |
| "loss": 2.1529, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00015728155339805825, |
| "loss": 2.181, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00015631067961165048, |
| "loss": 2.1168, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0001553398058252427, |
| "loss": 2.2189, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00015436893203883497, |
| "loss": 2.1362, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0001533980582524272, |
| "loss": 2.0704, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00015242718446601943, |
| "loss": 2.1273, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00015145631067961166, |
| "loss": 2.1639, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00015048543689320389, |
| "loss": 2.1639, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00014951456310679611, |
| "loss": 2.0664, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00014854368932038834, |
| "loss": 2.0539, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.0001475728155339806, |
| "loss": 2.206, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00014660194174757283, |
| "loss": 2.1366, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00014563106796116506, |
| "loss": 2.1016, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0001446601941747573, |
| "loss": 2.1042, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00014368932038834952, |
| "loss": 2.144, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00014271844660194175, |
| "loss": 2.0834, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.000141747572815534, |
| "loss": 2.0255, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00014077669902912623, |
| "loss": 2.131, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00013980582524271846, |
| "loss": 2.2428, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.0001388349514563107, |
| "loss": 2.0831, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.00013786407766990292, |
| "loss": 2.1633, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00013689320388349515, |
| "loss": 2.2224, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.0001359223300970874, |
| "loss": 2.0999, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00013495145631067963, |
| "loss": 2.1749, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00013398058252427186, |
| "loss": 1.9726, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.0001330097087378641, |
| "loss": 2.1678, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00013203883495145632, |
| "loss": 2.0646, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00013106796116504855, |
| "loss": 2.0049, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00013009708737864078, |
| "loss": 2.0944, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00012912621359223304, |
| "loss": 2.2013, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.00012815533980582526, |
| "loss": 2.311, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.0001271844660194175, |
| "loss": 2.0863, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00012621359223300972, |
| "loss": 2.2028, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00012524271844660195, |
| "loss": 2.0283, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00012427184466019418, |
| "loss": 2.2133, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0001233009708737864, |
| "loss": 2.1084, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00012233009708737864, |
| "loss": 1.967, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00012135922330097087, |
| "loss": 2.3109, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0001203883495145631, |
| "loss": 2.2248, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00011941747572815534, |
| "loss": 2.1178, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00011844660194174757, |
| "loss": 2.161, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.0001174757281553398, |
| "loss": 2.0778, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00011650485436893204, |
| "loss": 2.2326, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00011553398058252427, |
| "loss": 2.0262, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.0001145631067961165, |
| "loss": 2.076, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00011359223300970874, |
| "loss": 2.0044, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00011262135922330097, |
| "loss": 2.0397, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0001116504854368932, |
| "loss": 2.1135, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.00011067961165048544, |
| "loss": 2.29, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.00010970873786407767, |
| "loss": 1.949, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0001087378640776699, |
| "loss": 2.1177, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.00010776699029126213, |
| "loss": 2.1405, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00010679611650485437, |
| "loss": 2.2089, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.0001058252427184466, |
| "loss": 2.1267, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.00010485436893203883, |
| "loss": 2.0522, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.00010388349514563107, |
| "loss": 2.0273, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.0001029126213592233, |
| "loss": 2.1042, |
| "step": 100 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 206, |
| "num_train_epochs": 2, |
| "save_steps": 100, |
| "total_flos": 2.9528801422848e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|