| { |
| "best_metric": 0.20053359866142273, |
| "best_model_checkpoint": "models/startup-team-phi-qlora/checkpoint-100", |
| "epoch": 2.937728937728938, |
| "eval_steps": 10, |
| "global_step": 102, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.029304029304029304, |
| "grad_norm": 0.14737844467163086, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 1.1363, |
| "mean_token_accuracy": 0.7657624632120132, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.14652014652014653, |
| "grad_norm": 0.18032211065292358, |
| "learning_rate": 0.0001666666666666667, |
| "loss": 1.1028, |
| "mean_token_accuracy": 0.771841392852366, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.29304029304029305, |
| "grad_norm": 0.2304220199584961, |
| "learning_rate": 0.00019914448613738106, |
| "loss": 1.0475, |
| "mean_token_accuracy": 0.7810361601412297, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.29304029304029305, |
| "eval_loss": 0.9339649081230164, |
| "eval_mean_token_accuracy": 0.7965992987155914, |
| "eval_runtime": 50.6795, |
| "eval_samples_per_second": 1.204, |
| "eval_steps_per_second": 0.158, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.43956043956043955, |
| "grad_norm": 0.30109331011772156, |
| "learning_rate": 0.0001956940335732209, |
| "loss": 0.8973, |
| "mean_token_accuracy": 0.8004154480993748, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.5860805860805861, |
| "grad_norm": 0.3704577684402466, |
| "learning_rate": 0.00018968727415326884, |
| "loss": 0.703, |
| "mean_token_accuracy": 0.8325344368815422, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.5860805860805861, |
| "eval_loss": 0.5746913552284241, |
| "eval_mean_token_accuracy": 0.8676818311214447, |
| "eval_runtime": 44.3992, |
| "eval_samples_per_second": 1.374, |
| "eval_steps_per_second": 0.18, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.7326007326007326, |
| "grad_norm": 0.3730122447013855, |
| "learning_rate": 0.00018128466845916154, |
| "loss": 0.5533, |
| "mean_token_accuracy": 0.8719696968793869, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.8791208791208791, |
| "grad_norm": 0.30677032470703125, |
| "learning_rate": 0.00017071067811865476, |
| "loss": 0.4128, |
| "mean_token_accuracy": 0.9108748823404312, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.8791208791208791, |
| "eval_loss": 0.32779568433761597, |
| "eval_mean_token_accuracy": 0.9334485232830048, |
| "eval_runtime": 49.8784, |
| "eval_samples_per_second": 1.223, |
| "eval_steps_per_second": 0.16, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5578669905662537, |
| "learning_rate": 0.00015824776968678024, |
| "loss": 0.335, |
| "mean_token_accuracy": 0.9305518567562103, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.1465201465201464, |
| "grad_norm": 0.2113618403673172, |
| "learning_rate": 0.00014422886902190014, |
| "loss": 0.306, |
| "mean_token_accuracy": 0.9388440825045109, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.1465201465201464, |
| "eval_loss": 0.2558988034725189, |
| "eval_mean_token_accuracy": 0.9488431662321091, |
| "eval_runtime": 42.7021, |
| "eval_samples_per_second": 1.428, |
| "eval_steps_per_second": 0.187, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.293040293040293, |
| "grad_norm": 0.16832281649112701, |
| "learning_rate": 0.00012902846772544624, |
| "loss": 0.2482, |
| "mean_token_accuracy": 0.9499588944017887, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.4395604395604396, |
| "grad_norm": 0.1472930908203125, |
| "learning_rate": 0.00011305261922200519, |
| "loss": 0.2318, |
| "mean_token_accuracy": 0.9537023350596427, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.4395604395604396, |
| "eval_loss": 0.22639355063438416, |
| "eval_mean_token_accuracy": 0.953661359846592, |
| "eval_runtime": 42.6645, |
| "eval_samples_per_second": 1.43, |
| "eval_steps_per_second": 0.188, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.5860805860805862, |
| "grad_norm": 0.16793935000896454, |
| "learning_rate": 9.67280917178224e-05, |
| "loss": 0.2364, |
| "mean_token_accuracy": 0.9529081016778946, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.7326007326007327, |
| "grad_norm": 0.15153780579566956, |
| "learning_rate": 8.049096779838719e-05, |
| "loss": 0.2431, |
| "mean_token_accuracy": 0.9506231568753719, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.7326007326007327, |
| "eval_loss": 0.2124478667974472, |
| "eval_mean_token_accuracy": 0.9563756883144379, |
| "eval_runtime": 42.4992, |
| "eval_samples_per_second": 1.435, |
| "eval_steps_per_second": 0.188, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.879120879120879, |
| "grad_norm": 0.1492297649383545, |
| "learning_rate": 6.477499520787665e-05, |
| "loss": 0.2357, |
| "mean_token_accuracy": 0.9522971525788307, |
| "step": 65 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.5314327478408813, |
| "learning_rate": 5.000000000000002e-05, |
| "loss": 0.2323, |
| "mean_token_accuracy": 0.9523700963367115, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.20574581623077393, |
| "eval_mean_token_accuracy": 0.9567286148667336, |
| "eval_runtime": 43.0255, |
| "eval_samples_per_second": 1.418, |
| "eval_steps_per_second": 0.186, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.1465201465201464, |
| "grad_norm": 0.15407854318618774, |
| "learning_rate": 3.6560671583635467e-05, |
| "loss": 0.2353, |
| "mean_token_accuracy": 0.9516373299062252, |
| "step": 75 |
| }, |
| { |
| "epoch": 2.293040293040293, |
| "grad_norm": 0.21263441443443298, |
| "learning_rate": 2.4816019252102273e-05, |
| "loss": 0.2195, |
| "mean_token_accuracy": 0.9543178603053093, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.293040293040293, |
| "eval_loss": 0.202470600605011, |
| "eval_mean_token_accuracy": 0.957635909318924, |
| "eval_runtime": 42.2023, |
| "eval_samples_per_second": 1.445, |
| "eval_steps_per_second": 0.19, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.4395604395604398, |
| "grad_norm": 0.21033723652362823, |
| "learning_rate": 1.5079781847342123e-05, |
| "loss": 0.2149, |
| "mean_token_accuracy": 0.9557917781174183, |
| "step": 85 |
| }, |
| { |
| "epoch": 2.586080586080586, |
| "grad_norm": 0.131508931517601, |
| "learning_rate": 7.612046748871327e-06, |
| "loss": 0.2072, |
| "mean_token_accuracy": 0.9576979361474514, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.586080586080586, |
| "eval_loss": 0.20078732073307037, |
| "eval_mean_token_accuracy": 0.9580192342400551, |
| "eval_runtime": 42.6779, |
| "eval_samples_per_second": 1.429, |
| "eval_steps_per_second": 0.187, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.7326007326007327, |
| "grad_norm": 0.1332683563232422, |
| "learning_rate": 2.612302072266637e-06, |
| "loss": 0.2166, |
| "mean_token_accuracy": 0.9556084908545017, |
| "step": 95 |
| }, |
| { |
| "epoch": 2.879120879120879, |
| "grad_norm": 0.13702718913555145, |
| "learning_rate": 2.141076761396521e-07, |
| "loss": 0.2034, |
| "mean_token_accuracy": 0.9584921665489674, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.879120879120879, |
| "eval_loss": 0.20053359866142273, |
| "eval_mean_token_accuracy": 0.9581777453422546, |
| "eval_runtime": 43.3904, |
| "eval_samples_per_second": 1.406, |
| "eval_steps_per_second": 0.184, |
| "step": 100 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 102, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.61256195259904e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|