jonatatyska's picture
Model save
e78d9d1 verified
{
"best_metric": 0.2590486407279968,
"best_model_checkpoint": "data/Qwen2.5-0.5B-Instruct-EmbodiedZero/checkpoint-100",
"epoch": 2.380952380952381,
"eval_steps": 10,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11904761904761904,
"grad_norm": 12.975658407394809,
"learning_rate": 3.3333333333333333e-06,
"loss": 1.5303,
"mean_token_accuracy": 0.666902494430542,
"step": 5
},
{
"epoch": 0.23809523809523808,
"grad_norm": 6.4717367297675255,
"learning_rate": 6.666666666666667e-06,
"loss": 1.3514,
"mean_token_accuracy": 0.685704255104065,
"step": 10
},
{
"epoch": 0.23809523809523808,
"eval_loss": 1.1966838836669922,
"eval_mean_token_accuracy": 0.7069141214544122,
"eval_runtime": 1.8082,
"eval_samples_per_second": 733.879,
"eval_steps_per_second": 6.083,
"step": 10
},
{
"epoch": 0.35714285714285715,
"grad_norm": 3.86162454528153,
"learning_rate": 1e-05,
"loss": 0.9688,
"mean_token_accuracy": 0.7506838798522949,
"step": 15
},
{
"epoch": 0.47619047619047616,
"grad_norm": 1.6645962651415034,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.5609,
"mean_token_accuracy": 0.8392774343490601,
"step": 20
},
{
"epoch": 0.47619047619047616,
"eval_loss": 0.44837433099746704,
"eval_mean_token_accuracy": 0.8653898780996149,
"eval_runtime": 1.8047,
"eval_samples_per_second": 735.283,
"eval_steps_per_second": 6.095,
"step": 20
},
{
"epoch": 0.5952380952380952,
"grad_norm": 0.9631809574633562,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.4285,
"mean_token_accuracy": 0.8694754362106323,
"step": 25
},
{
"epoch": 0.7142857142857143,
"grad_norm": 1.0392843877849576,
"learning_rate": 2e-05,
"loss": 0.3926,
"mean_token_accuracy": 0.8767556309700012,
"step": 30
},
{
"epoch": 0.7142857142857143,
"eval_loss": 0.36515992879867554,
"eval_mean_token_accuracy": 0.8830820647152987,
"eval_runtime": 1.8072,
"eval_samples_per_second": 734.287,
"eval_steps_per_second": 6.087,
"step": 30
},
{
"epoch": 0.8333333333333334,
"grad_norm": 0.8882592936724429,
"learning_rate": 2.3333333333333336e-05,
"loss": 0.3694,
"mean_token_accuracy": 0.8828507781028747,
"step": 35
},
{
"epoch": 0.9523809523809523,
"grad_norm": 0.9099622390748925,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.3369,
"mean_token_accuracy": 0.8917223811149597,
"step": 40
},
{
"epoch": 0.9523809523809523,
"eval_loss": 0.32486116886138916,
"eval_mean_token_accuracy": 0.8935280550609935,
"eval_runtime": 1.807,
"eval_samples_per_second": 734.354,
"eval_steps_per_second": 6.087,
"step": 40
},
{
"epoch": 1.0714285714285714,
"grad_norm": 0.7969308502776014,
"learning_rate": 3e-05,
"loss": 0.3127,
"mean_token_accuracy": 0.8970342874526978,
"step": 45
},
{
"epoch": 1.1904761904761905,
"grad_norm": 0.7625545705483939,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.304,
"mean_token_accuracy": 0.899208664894104,
"step": 50
},
{
"epoch": 1.1904761904761905,
"eval_loss": 0.30286020040512085,
"eval_mean_token_accuracy": 0.899386004968123,
"eval_runtime": 1.811,
"eval_samples_per_second": 732.754,
"eval_steps_per_second": 6.074,
"step": 50
},
{
"epoch": 1.3095238095238095,
"grad_norm": 0.8650271122549905,
"learning_rate": 3.6666666666666666e-05,
"loss": 0.3012,
"mean_token_accuracy": 0.9001616716384888,
"step": 55
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.9972393508554793,
"learning_rate": 4e-05,
"loss": 0.2859,
"mean_token_accuracy": 0.9037908315658569,
"step": 60
},
{
"epoch": 1.4285714285714286,
"eval_loss": 0.2923315465450287,
"eval_mean_token_accuracy": 0.902120438489047,
"eval_runtime": 1.8113,
"eval_samples_per_second": 732.621,
"eval_steps_per_second": 6.073,
"step": 60
},
{
"epoch": 1.5476190476190477,
"grad_norm": 1.0187892250022372,
"learning_rate": 4.3333333333333334e-05,
"loss": 0.2893,
"mean_token_accuracy": 0.9032210826873779,
"step": 65
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.9131472600640349,
"learning_rate": 4.666666666666667e-05,
"loss": 0.2784,
"mean_token_accuracy": 0.9060839653015137,
"step": 70
},
{
"epoch": 1.6666666666666665,
"eval_loss": 0.2805326282978058,
"eval_mean_token_accuracy": 0.9057970317927274,
"eval_runtime": 1.8157,
"eval_samples_per_second": 730.844,
"eval_steps_per_second": 6.058,
"step": 70
},
{
"epoch": 1.7857142857142856,
"grad_norm": 1.0125564206267341,
"learning_rate": 5e-05,
"loss": 0.2752,
"mean_token_accuracy": 0.9072439670562744,
"step": 75
},
{
"epoch": 1.9047619047619047,
"grad_norm": 0.9459831073632716,
"learning_rate": 4.9998633032622725e-05,
"loss": 0.2816,
"mean_token_accuracy": 0.9059006571769714,
"step": 80
},
{
"epoch": 1.9047619047619047,
"eval_loss": 0.27141886949539185,
"eval_mean_token_accuracy": 0.9087122028524225,
"eval_runtime": 1.8099,
"eval_samples_per_second": 733.176,
"eval_steps_per_second": 6.078,
"step": 80
},
{
"epoch": 2.0238095238095237,
"grad_norm": 0.8862940313394464,
"learning_rate": 4.999453229658863e-05,
"loss": 0.2685,
"mean_token_accuracy": 0.9087640166282653,
"step": 85
},
{
"epoch": 2.142857142857143,
"grad_norm": 0.8183599098342472,
"learning_rate": 4.998769829017084e-05,
"loss": 0.239,
"mean_token_accuracy": 0.9171680569648742,
"step": 90
},
{
"epoch": 2.142857142857143,
"eval_loss": 0.2653484642505646,
"eval_mean_token_accuracy": 0.9104260748082941,
"eval_runtime": 1.8106,
"eval_samples_per_second": 732.919,
"eval_steps_per_second": 6.075,
"step": 90
},
{
"epoch": 2.261904761904762,
"grad_norm": 0.7459883992110367,
"learning_rate": 4.9978131843757234e-05,
"loss": 0.2437,
"mean_token_accuracy": 0.9155828952789307,
"step": 95
},
{
"epoch": 2.380952380952381,
"grad_norm": 0.739863741164227,
"learning_rate": 4.9965834119749617e-05,
"loss": 0.2429,
"mean_token_accuracy": 0.916073453426361,
"step": 100
},
{
"epoch": 2.380952380952381,
"eval_loss": 0.2590486407279968,
"eval_mean_token_accuracy": 0.9125146703286604,
"eval_runtime": 1.8117,
"eval_samples_per_second": 732.46,
"eval_steps_per_second": 6.072,
"step": 100
},
{
"epoch": 2.380952380952381,
"step": 100,
"total_flos": 8792309760000.0,
"train_loss": 0.4630614173412323,
"train_runtime": 106.905,
"train_samples_per_second": 1795.987,
"train_steps_per_second": 14.031
}
],
"logging_steps": 5,
"max_steps": 1500,
"num_input_tokens_seen": 0,
"num_train_epochs": 36,
"save_steps": 20,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8792309760000.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}