| { |
| "best_metric": 1.2364587783813477, |
| "best_model_checkpoint": "./qlora-out/checkpoint-150", |
| "epoch": 0.04325493376588267, |
| "eval_steps": 5, |
| "global_step": 160, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 2e-05, |
| "loss": 2.0394, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4e-05, |
| "loss": 1.8358, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 6e-05, |
| "loss": 1.9194, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 8e-05, |
| "loss": 1.5998, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.0001, |
| "loss": 2.3201, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0, |
| "eval_loss": 1.9676190614700317, |
| "eval_runtime": 16.4542, |
| "eval_samples_per_second": 4.558, |
| "eval_steps_per_second": 4.558, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.00012, |
| "loss": 1.7955, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.00014, |
| "loss": 1.9124, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.00016, |
| "loss": 1.8056, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.00018, |
| "loss": 1.6452, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.0002, |
| "loss": 2.2557, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0, |
| "eval_loss": 1.739979863166809, |
| "eval_runtime": 16.9804, |
| "eval_samples_per_second": 4.417, |
| "eval_steps_per_second": 4.417, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.00019999999598540582, |
| "loss": 1.5087, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.00019999998394162357, |
| "loss": 1.785, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.00019999996386865424, |
| "loss": 1.5459, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.0001999999357664994, |
| "loss": 1.8027, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.00019999989963516136, |
| "loss": 1.571, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0, |
| "eval_loss": 1.6035555601119995, |
| "eval_runtime": 17.543, |
| "eval_samples_per_second": 4.275, |
| "eval_steps_per_second": 4.275, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.000199999855474643, |
| "loss": 1.2283, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.00019999980328494788, |
| "loss": 1.2223, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.00019999974306608012, |
| "loss": 1.8525, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999967481804467, |
| "loss": 1.3687, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999959854084692, |
| "loss": 1.3875, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_loss": 1.5073901414871216, |
| "eval_runtime": 18.6107, |
| "eval_samples_per_second": 4.03, |
| "eval_steps_per_second": 4.03, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999951423449303, |
| "loss": 1.3078, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999942189898975, |
| "loss": 1.4264, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999932153434453, |
| "loss": 1.8519, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999921314056539, |
| "loss": 1.1411, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999909671766103, |
| "loss": 1.5436, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_loss": 1.4679571390151978, |
| "eval_runtime": 19.229, |
| "eval_samples_per_second": 3.9, |
| "eval_steps_per_second": 3.9, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999897226564083, |
| "loss": 1.2738, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0001999988397845148, |
| "loss": 1.1203, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999869927429353, |
| "loss": 1.1895, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0001999985507349883, |
| "loss": 1.3932, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999839416661106, |
| "loss": 1.4257, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_loss": 1.422224998474121, |
| "eval_runtime": 18.4406, |
| "eval_samples_per_second": 4.067, |
| "eval_steps_per_second": 4.067, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999822956917439, |
| "loss": 1.1026, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999805694269145, |
| "loss": 1.337, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0001999978762871762, |
| "loss": 1.2867, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999768760264303, |
| "loss": 1.4109, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999749088910718, |
| "loss": 1.097, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_loss": 1.4048928022384644, |
| "eval_runtime": 18.7186, |
| "eval_samples_per_second": 4.007, |
| "eval_steps_per_second": 4.007, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0001999972861465844, |
| "loss": 1.11, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999707337509115, |
| "loss": 0.9963, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999685257464446, |
| "loss": 1.2733, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999662374526215, |
| "loss": 1.6809, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999638688696254, |
| "loss": 1.4748, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_loss": 1.383140206336975, |
| "eval_runtime": 18.9843, |
| "eval_samples_per_second": 3.951, |
| "eval_steps_per_second": 3.951, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999614199976465, |
| "loss": 1.3495, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999588908368817, |
| "loss": 0.6009, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999562813875334, |
| "loss": 1.2478, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999535916498116, |
| "loss": 1.525, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999508216239322, |
| "loss": 1.1752, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_loss": 1.378393292427063, |
| "eval_runtime": 18.7075, |
| "eval_samples_per_second": 4.009, |
| "eval_steps_per_second": 4.009, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0001999947971310118, |
| "loss": 1.4202, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0001999945040708597, |
| "loss": 1.2551, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999420298196055, |
| "loss": 1.3672, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999389386433843, |
| "loss": 1.4148, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999357671801818, |
| "loss": 1.3857, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_loss": 1.3779667615890503, |
| "eval_runtime": 18.7662, |
| "eval_samples_per_second": 3.997, |
| "eval_steps_per_second": 3.997, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999325154302535, |
| "loss": 1.8206, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999291833938597, |
| "loss": 1.248, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0001999925771071268, |
| "loss": 1.2879, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999222784627523, |
| "loss": 1.298, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.00019999187055685935, |
| "loss": 1.5965, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_loss": 1.3631337881088257, |
| "eval_runtime": 18.8459, |
| "eval_samples_per_second": 3.98, |
| "eval_steps_per_second": 3.98, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0001999915052389078, |
| "loss": 1.3097, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019999113189244993, |
| "loss": 1.5143, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019999075051751572, |
| "loss": 1.4421, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0001999903611141358, |
| "loss": 1.2693, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019998996368234142, |
| "loss": 1.071, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_loss": 1.3275150060653687, |
| "eval_runtime": 18.8306, |
| "eval_samples_per_second": 3.983, |
| "eval_steps_per_second": 3.983, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019998955822216448, |
| "loss": 1.4956, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019998914473363754, |
| "loss": 1.2852, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019998872321679386, |
| "loss": 1.1603, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019998829367166718, |
| "loss": 1.3886, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019998785609829205, |
| "loss": 1.197, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_loss": 1.3170785903930664, |
| "eval_runtime": 18.9971, |
| "eval_samples_per_second": 3.948, |
| "eval_steps_per_second": 3.948, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019998741049670363, |
| "loss": 1.3182, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019998695686693766, |
| "loss": 1.0751, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019998649520903055, |
| "loss": 1.2292, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019998602552301936, |
| "loss": 1.4889, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019998554780894186, |
| "loss": 1.8883, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_loss": 1.3108329772949219, |
| "eval_runtime": 18.8686, |
| "eval_samples_per_second": 3.975, |
| "eval_steps_per_second": 3.975, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0001999850620668364, |
| "loss": 1.5412, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0001999845682967419, |
| "loss": 1.4129, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019998406649869807, |
| "loss": 1.1464, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0001999835566727452, |
| "loss": 1.3642, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019998303881892424, |
| "loss": 1.0119, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_loss": 1.305336356163025, |
| "eval_runtime": 18.6567, |
| "eval_samples_per_second": 4.02, |
| "eval_steps_per_second": 4.02, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019998251293727672, |
| "loss": 1.2123, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0001999819790278449, |
| "loss": 1.1538, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0001999814370906716, |
| "loss": 1.292, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0001999808871258004, |
| "loss": 1.4582, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019998032913327545, |
| "loss": 1.399, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_loss": 1.3035436868667603, |
| "eval_runtime": 18.936, |
| "eval_samples_per_second": 3.961, |
| "eval_steps_per_second": 3.961, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019997976311314148, |
| "loss": 1.2134, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019997918906544402, |
| "loss": 1.3052, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019997860699022915, |
| "loss": 1.2369, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019997801688754354, |
| "loss": 1.4202, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019997741875743463, |
| "loss": 1.5369, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_loss": 1.3028186559677124, |
| "eval_runtime": 18.8349, |
| "eval_samples_per_second": 3.982, |
| "eval_steps_per_second": 3.982, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019997681259995045, |
| "loss": 1.1764, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019997619841513964, |
| "loss": 1.1738, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019997557620305152, |
| "loss": 1.0718, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019997494596373603, |
| "loss": 0.9415, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019997430769724383, |
| "loss": 1.5707, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_loss": 1.2982343435287476, |
| "eval_runtime": 18.86, |
| "eval_samples_per_second": 3.977, |
| "eval_steps_per_second": 3.977, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0001999736614036261, |
| "loss": 1.1557, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019997300708293482, |
| "loss": 1.5742, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0001999723447352224, |
| "loss": 1.1634, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019997167436054217, |
| "loss": 1.1777, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019997099595894783, |
| "loss": 1.0073, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_loss": 1.3020063638687134, |
| "eval_runtime": 18.7946, |
| "eval_samples_per_second": 3.99, |
| "eval_steps_per_second": 3.99, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0001999703095304939, |
| "loss": 1.1973, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0001999696150752355, |
| "loss": 0.9865, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019996891259322838, |
| "loss": 1.397, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019996820208452895, |
| "loss": 1.789, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019996748354919426, |
| "loss": 1.1018, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_loss": 1.2908077239990234, |
| "eval_runtime": 18.8592, |
| "eval_samples_per_second": 3.977, |
| "eval_steps_per_second": 3.977, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019996675698728198, |
| "loss": 0.9187, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019996602239885048, |
| "loss": 1.0549, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019996527978395872, |
| "loss": 0.9875, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019996452914266633, |
| "loss": 1.1548, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0001999637704750336, |
| "loss": 1.2036, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_loss": 1.2741996049880981, |
| "eval_runtime": 18.8746, |
| "eval_samples_per_second": 3.974, |
| "eval_steps_per_second": 3.974, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019996300378112138, |
| "loss": 1.0375, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019996222906099132, |
| "loss": 0.948, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019996144631470554, |
| "loss": 1.2329, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019996065554232697, |
| "loss": 1.1863, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019995985674391904, |
| "loss": 1.2444, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_loss": 1.2966747283935547, |
| "eval_runtime": 18.7891, |
| "eval_samples_per_second": 3.992, |
| "eval_steps_per_second": 3.992, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019995904991954591, |
| "loss": 1.4494, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019995823506927237, |
| "loss": 1.246, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0001999574121931638, |
| "loss": 0.6458, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019995658129128634, |
| "loss": 0.8595, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019995574236370667, |
| "loss": 0.6345, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_loss": 1.2656028270721436, |
| "eval_runtime": 18.7533, |
| "eval_samples_per_second": 3.999, |
| "eval_steps_per_second": 3.999, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019995489541049213, |
| "loss": 1.5478, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019995404043171073, |
| "loss": 1.3418, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019995317742743115, |
| "loss": 1.0865, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019995230639772266, |
| "loss": 1.4096, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0001999514273426552, |
| "loss": 1.0579, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_loss": 1.2640577554702759, |
| "eval_runtime": 18.8481, |
| "eval_samples_per_second": 3.979, |
| "eval_steps_per_second": 3.979, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019995054026229934, |
| "loss": 1.1476, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019994964515672635, |
| "loss": 1.2489, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019994874202600802, |
| "loss": 1.202, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019994783087021694, |
| "loss": 1.1668, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019994691168942624, |
| "loss": 1.2388, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_loss": 1.254260778427124, |
| "eval_runtime": 18.8299, |
| "eval_samples_per_second": 3.983, |
| "eval_steps_per_second": 3.983, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019994598448370968, |
| "loss": 1.2471, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019994504925314178, |
| "loss": 1.3326, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019994410599779758, |
| "loss": 0.8072, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019994315471775286, |
| "loss": 1.3839, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019994219541308395, |
| "loss": 1.0607, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_loss": 1.2552071809768677, |
| "eval_runtime": 18.8237, |
| "eval_samples_per_second": 3.984, |
| "eval_steps_per_second": 3.984, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0001999412280838679, |
| "loss": 1.125, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0001999402527301824, |
| "loss": 1.5856, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019993926935210575, |
| "loss": 1.0919, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019993827794971685, |
| "loss": 1.077, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0001999372785230954, |
| "loss": 1.0723, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_loss": 1.2610790729522705, |
| "eval_runtime": 18.7486, |
| "eval_samples_per_second": 4.0, |
| "eval_steps_per_second": 4.0, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019993627107232155, |
| "loss": 1.2645, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019993525559747622, |
| "loss": 1.0603, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019993423209864103, |
| "loss": 1.5076, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019993320057589805, |
| "loss": 0.7648, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019993216102933014, |
| "loss": 1.3775, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_loss": 1.2581839561462402, |
| "eval_runtime": 18.8104, |
| "eval_samples_per_second": 3.987, |
| "eval_steps_per_second": 3.987, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019993111345902078, |
| "loss": 1.6712, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019993005786505405, |
| "loss": 0.9862, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019992899424751474, |
| "loss": 0.834, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019992792260648825, |
| "loss": 0.9228, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019992684294206056, |
| "loss": 1.1543, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_loss": 1.2526917457580566, |
| "eval_runtime": 18.8068, |
| "eval_samples_per_second": 3.988, |
| "eval_steps_per_second": 3.988, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019992575525431847, |
| "loss": 1.1265, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019992465954334922, |
| "loss": 1.4227, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0001999235558092408, |
| "loss": 1.1107, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0001999224440520819, |
| "loss": 1.9652, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019992132427196167, |
| "loss": 1.1324, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_loss": 1.2364587783813477, |
| "eval_runtime": 18.8757, |
| "eval_samples_per_second": 3.973, |
| "eval_steps_per_second": 3.973, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0001999201964689701, |
| "loss": 1.0867, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019991906064319777, |
| "loss": 1.1107, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019991791679473578, |
| "loss": 1.1554, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019991676492367604, |
| "loss": 0.6891, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019991560503011103, |
| "loss": 0.7309, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_loss": 1.2444915771484375, |
| "eval_runtime": 18.7995, |
| "eval_samples_per_second": 3.989, |
| "eval_steps_per_second": 3.989, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019991443711413385, |
| "loss": 0.8146, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0001999132611758383, |
| "loss": 1.259, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0001999120772153188, |
| "loss": 0.8937, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019991088523267043, |
| "loss": 1.1238, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019990968522798887, |
| "loss": 1.3585, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_loss": 1.2490602731704712, |
| "eval_runtime": 18.834, |
| "eval_samples_per_second": 3.982, |
| "eval_steps_per_second": 3.982, |
| "step": 160 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 11097, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 10, |
| "total_flos": 488265068052480.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|