| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9968, | |
| "eval_steps": 500, | |
| "global_step": 78, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": "0.0000e+00", | |
| "loss": 2.2666, | |
| "slid_loss": 2.2666, | |
| "step": 1, | |
| "time": 42.16 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.2601, | |
| "slid_loss": 2.2634, | |
| "step": 2, | |
| "time": 34.12 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.3071, | |
| "slid_loss": 2.2779, | |
| "step": 3, | |
| "time": 33.4 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.1847, | |
| "slid_loss": 2.2546, | |
| "step": 4, | |
| "time": 33.28 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.2277, | |
| "slid_loss": 2.2492, | |
| "step": 5, | |
| "time": 34.62 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.1922, | |
| "slid_loss": 2.2397, | |
| "step": 6, | |
| "time": 32.87 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.168, | |
| "slid_loss": 2.2295, | |
| "step": 7, | |
| "time": 33.59 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.2024, | |
| "slid_loss": 2.2261, | |
| "step": 8, | |
| "time": 33.64 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.1198, | |
| "slid_loss": 2.2143, | |
| "step": 9, | |
| "time": 35.32 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.139, | |
| "slid_loss": 2.2068, | |
| "step": 10, | |
| "time": 33.38 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.1052, | |
| "slid_loss": 2.1975, | |
| "step": 11, | |
| "time": 33.38 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.1561, | |
| "slid_loss": 2.1941, | |
| "step": 12, | |
| "time": 33.0 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.085, | |
| "slid_loss": 2.1857, | |
| "step": 13, | |
| "time": 32.73 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.1404, | |
| "slid_loss": 2.1824, | |
| "step": 14, | |
| "time": 33.91 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.0282, | |
| "slid_loss": 2.1722, | |
| "step": 15, | |
| "time": 32.97 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.0576, | |
| "slid_loss": 2.165, | |
| "step": 16, | |
| "time": 32.89 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.0584, | |
| "slid_loss": 2.1587, | |
| "step": 17, | |
| "time": 33.64 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.086, | |
| "slid_loss": 2.1547, | |
| "step": 18, | |
| "time": 35.21 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.0918, | |
| "slid_loss": 2.1514, | |
| "step": 19, | |
| "time": 33.29 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.0255, | |
| "slid_loss": 2.1451, | |
| "step": 20, | |
| "time": 33.69 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.0119, | |
| "slid_loss": 2.1387, | |
| "step": 21, | |
| "time": 33.5 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.9633, | |
| "slid_loss": 2.1308, | |
| "step": 22, | |
| "time": 35.21 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.0063, | |
| "slid_loss": 2.1254, | |
| "step": 23, | |
| "time": 32.96 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 2.0122, | |
| "slid_loss": 2.1206, | |
| "step": 24, | |
| "time": 33.34 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.9364, | |
| "slid_loss": 2.1133, | |
| "step": 25, | |
| "time": 33.35 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.9493, | |
| "slid_loss": 2.107, | |
| "step": 26, | |
| "time": 33.24 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.9124, | |
| "slid_loss": 2.0998, | |
| "step": 27, | |
| "time": 33.34 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.9077, | |
| "slid_loss": 2.0929, | |
| "step": 28, | |
| "time": 33.03 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.9838, | |
| "slid_loss": 2.0891, | |
| "step": 29, | |
| "time": 34.5 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.988, | |
| "slid_loss": 2.0858, | |
| "step": 30, | |
| "time": 33.39 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.9561, | |
| "slid_loss": 2.0816, | |
| "step": 31, | |
| "time": 33.25 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.8664, | |
| "slid_loss": 2.0749, | |
| "step": 32, | |
| "time": 32.75 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.8385, | |
| "slid_loss": 2.0677, | |
| "step": 33, | |
| "time": 33.61 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.8827, | |
| "slid_loss": 2.0623, | |
| "step": 34, | |
| "time": 33.48 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.8249, | |
| "slid_loss": 2.0555, | |
| "step": 35, | |
| "time": 33.62 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.8204, | |
| "slid_loss": 2.049, | |
| "step": 36, | |
| "time": 33.21 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.8761, | |
| "slid_loss": 2.0443, | |
| "step": 37, | |
| "time": 32.95 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.8621, | |
| "slid_loss": 2.0395, | |
| "step": 38, | |
| "time": 33.02 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.7632, | |
| "slid_loss": 2.0324, | |
| "step": 39, | |
| "time": 32.9 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.8407, | |
| "slid_loss": 2.0276, | |
| "step": 40, | |
| "time": 192.32 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.7514, | |
| "slid_loss": 2.0209, | |
| "step": 41, | |
| "time": 33.2 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.7342, | |
| "slid_loss": 2.014, | |
| "step": 42, | |
| "time": 33.32 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.7591, | |
| "slid_loss": 2.0081, | |
| "step": 43, | |
| "time": 32.94 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.7156, | |
| "slid_loss": 2.0015, | |
| "step": 44, | |
| "time": 32.85 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.7146, | |
| "slid_loss": 1.9951, | |
| "step": 45, | |
| "time": 32.84 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.7197, | |
| "slid_loss": 1.9891, | |
| "step": 46, | |
| "time": 32.83 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.6992, | |
| "slid_loss": 1.9829, | |
| "step": 47, | |
| "time": 33.24 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.7154, | |
| "slid_loss": 1.9774, | |
| "step": 48, | |
| "time": 34.15 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.6725, | |
| "slid_loss": 1.9711, | |
| "step": 49, | |
| "time": 35.49 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.6221, | |
| "slid_loss": 1.9642, | |
| "step": 50, | |
| "time": 33.02 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.656, | |
| "slid_loss": 1.9581, | |
| "step": 51, | |
| "time": 33.54 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.6232, | |
| "slid_loss": 1.9517, | |
| "step": 52, | |
| "time": 33.15 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.6363, | |
| "slid_loss": 1.9457, | |
| "step": 53, | |
| "time": 33.17 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.6079, | |
| "slid_loss": 1.9395, | |
| "step": 54, | |
| "time": 32.8 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.5803, | |
| "slid_loss": 1.9329, | |
| "step": 55, | |
| "time": 33.72 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.5249, | |
| "slid_loss": 1.9257, | |
| "step": 56, | |
| "time": 33.48 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.624, | |
| "slid_loss": 1.9204, | |
| "step": 57, | |
| "time": 33.19 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.5509, | |
| "slid_loss": 1.914, | |
| "step": 58, | |
| "time": 32.7 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.5339, | |
| "slid_loss": 1.9076, | |
| "step": 59, | |
| "time": 34.98 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.559, | |
| "slid_loss": 1.9017, | |
| "step": 60, | |
| "time": 33.29 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.4958, | |
| "slid_loss": 1.8951, | |
| "step": 61, | |
| "time": 32.61 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.4871, | |
| "slid_loss": 1.8885, | |
| "step": 62, | |
| "time": 33.46 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.4523, | |
| "slid_loss": 1.8816, | |
| "step": 63, | |
| "time": 32.93 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.4786, | |
| "slid_loss": 1.8753, | |
| "step": 64, | |
| "time": 33.78 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.4455, | |
| "slid_loss": 1.8687, | |
| "step": 65, | |
| "time": 32.82 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.4159, | |
| "slid_loss": 1.8618, | |
| "step": 66, | |
| "time": 34.87 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.3869, | |
| "slid_loss": 1.8547, | |
| "step": 67, | |
| "time": 33.06 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.3814, | |
| "slid_loss": 1.8478, | |
| "step": 68, | |
| "time": 34.85 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.3668, | |
| "slid_loss": 1.8408, | |
| "step": 69, | |
| "time": 33.18 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.4419, | |
| "slid_loss": 1.8351, | |
| "step": 70, | |
| "time": 34.61 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.3532, | |
| "slid_loss": 1.8283, | |
| "step": 71, | |
| "time": 33.92 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.343, | |
| "slid_loss": 1.8216, | |
| "step": 72, | |
| "time": 32.6 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.3843, | |
| "slid_loss": 1.8156, | |
| "step": 73, | |
| "time": 32.92 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.3455, | |
| "slid_loss": 1.8092, | |
| "step": 74, | |
| "time": 33.47 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.3042, | |
| "slid_loss": 1.8025, | |
| "step": 75, | |
| "time": 33.54 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.347, | |
| "slid_loss": 1.7965, | |
| "step": 76, | |
| "time": 33.22 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.237, | |
| "slid_loss": 1.7892, | |
| "step": 77, | |
| "time": 33.25 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": "5.0000e-06", | |
| "loss": 1.1854, | |
| "slid_loss": 1.7815, | |
| "step": 78, | |
| "time": 33.47 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 78, | |
| "time": 167.03, | |
| "total_flos": 0.0, | |
| "train_loss": 1.781490119603964, | |
| "train_runtime": 2945.5278, | |
| "train_samples_per_second": 6.79, | |
| "train_steps_per_second": 0.026 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 78, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |