phi3-mini-clustered-flan / ts_expert_2 /trainer_state.json
TahaBa's picture
Upload folder using huggingface_hub
6956723 verified
{
"best_metric": 1.7200040817260742,
"best_model_checkpoint": "./results/cluster2_batch1_prop0.2/checkpoint-1500",
"epoch": 0.9999231616422918,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04098045744435622,
"grad_norm": 0.3399759829044342,
"learning_rate": 9.997020702755353e-05,
"loss": 1.9745,
"step": 100
},
{
"epoch": 0.08196091488871245,
"grad_norm": 1.2304991483688354,
"learning_rate": 9.930186708264901e-05,
"loss": 1.7722,
"step": 200
},
{
"epoch": 0.12294137233306866,
"grad_norm": 1.1636056900024414,
"learning_rate": 9.776557563346957e-05,
"loss": 1.7359,
"step": 300
},
{
"epoch": 0.1639218297774249,
"grad_norm": 1.0206753015518188,
"learning_rate": 9.538837884587511e-05,
"loss": 1.6943,
"step": 400
},
{
"epoch": 0.2049022872217811,
"grad_norm": 1.0695409774780273,
"learning_rate": 9.221212689004862e-05,
"loss": 1.6755,
"step": 500
},
{
"epoch": 0.2049022872217811,
"eval_loss": 1.7410061359405518,
"eval_runtime": 1240.4448,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 2.02,
"step": 500
},
{
"epoch": 0.24588274466613733,
"grad_norm": 1.3881298303604126,
"learning_rate": 8.82927371749271e-05,
"loss": 1.6707,
"step": 600
},
{
"epoch": 0.28686320211049354,
"grad_norm": 0.9833546876907349,
"learning_rate": 8.369920993113824e-05,
"loss": 1.6787,
"step": 700
},
{
"epoch": 0.3278436595548498,
"grad_norm": 0.9632484912872314,
"learning_rate": 7.851241347294876e-05,
"loss": 1.683,
"step": 800
},
{
"epoch": 0.36882411699920603,
"grad_norm": 0.9162977337837219,
"learning_rate": 7.28236605244935e-05,
"loss": 1.6609,
"step": 900
},
{
"epoch": 0.4098045744435622,
"grad_norm": 1.2075759172439575,
"learning_rate": 6.673310067383545e-05,
"loss": 1.6527,
"step": 1000
},
{
"epoch": 0.4098045744435622,
"eval_loss": 1.7281365394592285,
"eval_runtime": 1238.7549,
"eval_samples_per_second": 4.045,
"eval_steps_per_second": 2.023,
"step": 1000
},
{
"epoch": 0.45078503188791846,
"grad_norm": 1.0344264507293701,
"learning_rate": 6.034795725544571e-05,
"loss": 1.6614,
"step": 1100
},
{
"epoch": 0.49176548933227465,
"grad_norm": 1.024488925933838,
"learning_rate": 5.378063970050694e-05,
"loss": 1.6817,
"step": 1200
},
{
"epoch": 0.5327459467766309,
"grad_norm": 1.174185037612915,
"learning_rate": 4.7146764586811296e-05,
"loss": 1.6607,
"step": 1300
},
{
"epoch": 0.5737264042209871,
"grad_norm": 1.417396903038025,
"learning_rate": 4.056312022735417e-05,
"loss": 1.6499,
"step": 1400
},
{
"epoch": 0.6147068616653434,
"grad_norm": 0.985633909702301,
"learning_rate": 3.414561063071644e-05,
"loss": 1.6598,
"step": 1500
},
{
"epoch": 0.6147068616653434,
"eval_loss": 1.7200040817260742,
"eval_runtime": 1239.5137,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 2.022,
"step": 1500
},
{
"epoch": 0.6556873191096996,
"grad_norm": 1.1905289888381958,
"learning_rate": 2.8007215029485057e-05,
"loss": 1.6755,
"step": 1600
},
{
"epoch": 0.6966677765540558,
"grad_norm": 1.6823028326034546,
"learning_rate": 2.2255998898888165e-05,
"loss": 1.6654,
"step": 1700
},
{
"epoch": 0.7376482339984121,
"grad_norm": 1.2739040851593018,
"learning_rate": 1.6993211481344824e-05,
"loss": 1.6453,
"step": 1800
},
{
"epoch": 0.7786286914427683,
"grad_norm": 1.4436434507369995,
"learning_rate": 1.2311503309705629e-05,
"loss": 1.6359,
"step": 1900
},
{
"epoch": 0.8196091488871244,
"grad_norm": 1.336064100265503,
"learning_rate": 8.293295109403504e-06,
"loss": 1.665,
"step": 2000
},
{
"epoch": 0.8196091488871244,
"eval_loss": 1.7243653535842896,
"eval_runtime": 1238.6702,
"eval_samples_per_second": 4.045,
"eval_steps_per_second": 2.023,
"step": 2000
},
{
"epoch": 0.8605896063314806,
"grad_norm": 0.9651763439178467,
"learning_rate": 5.009326794732072e-06,
"loss": 1.6241,
"step": 2100
},
{
"epoch": 0.9015700637758369,
"grad_norm": 1.135158896446228,
"learning_rate": 2.5174121039404643e-06,
"loss": 1.6527,
"step": 2200
},
{
"epoch": 0.9425505212201931,
"grad_norm": 1.232653260231018,
"learning_rate": 8.614207975952082e-07,
"loss": 1.6381,
"step": 2300
},
{
"epoch": 0.9835309786645493,
"grad_norm": 0.941377580165863,
"learning_rate": 7.050633844443711e-08,
"loss": 1.6307,
"step": 2400
}
],
"logging_steps": 100,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.1491335851408384e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}