LLaMA-Omni-ASR-TTS / trainer_state.json
riyadhrazzaq's picture
Upload folder using huggingface_hub
b25d7b1 verified
{
"best_metric": 0.5067562460899353,
"best_model_checkpoint": "/scratch/mriyadh/llama_omni_asr_tts/exp/omni_stage_two_full/checkpoint-300",
"epoch": 59.171597633136095,
"eval_steps": 300,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.8875739644970414,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.6641,
"eval_samples_per_second": 36.821,
"eval_steps_per_second": 2.318,
"step": 300
},
{
"epoch": 1.4792899408284024,
"grad_norm": 86.5,
"learning_rate": 0.0001666666666666667,
"loss": 11.072,
"step": 500
},
{
"epoch": 1.7751479289940828,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8455,
"eval_samples_per_second": 36.64,
"eval_steps_per_second": 2.307,
"step": 600
},
{
"epoch": 2.662721893491124,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.7706,
"eval_samples_per_second": 36.714,
"eval_steps_per_second": 2.312,
"step": 900
},
{
"epoch": 2.9585798816568047,
"grad_norm": 32.75,
"learning_rate": 0.00019979028262377118,
"loss": 5.0622,
"step": 1000
},
{
"epoch": 3.5502958579881656,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.9679,
"eval_samples_per_second": 36.518,
"eval_steps_per_second": 2.299,
"step": 1200
},
{
"epoch": 4.437869822485207,
"grad_norm": 31.0,
"learning_rate": 0.00019893981312363562,
"loss": 3.234,
"step": 1500
},
{
"epoch": 4.437869822485207,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.907,
"eval_samples_per_second": 36.578,
"eval_steps_per_second": 2.303,
"step": 1500
},
{
"epoch": 5.325443786982248,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8765,
"eval_samples_per_second": 36.609,
"eval_steps_per_second": 2.305,
"step": 1800
},
{
"epoch": 5.9171597633136095,
"grad_norm": 6.96875,
"learning_rate": 0.00019744105246469263,
"loss": 2.5031,
"step": 2000
},
{
"epoch": 6.21301775147929,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.7905,
"eval_samples_per_second": 36.694,
"eval_steps_per_second": 2.31,
"step": 2100
},
{
"epoch": 7.100591715976331,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8163,
"eval_samples_per_second": 36.669,
"eval_steps_per_second": 2.309,
"step": 2400
},
{
"epoch": 7.396449704142012,
"grad_norm": 6.5625,
"learning_rate": 0.0001953038210948861,
"loss": 2.0242,
"step": 2500
},
{
"epoch": 7.988165680473373,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.9078,
"eval_samples_per_second": 36.578,
"eval_steps_per_second": 2.303,
"step": 2700
},
{
"epoch": 8.875739644970414,
"grad_norm": 10.875,
"learning_rate": 0.00019254212296427044,
"loss": 1.7989,
"step": 3000
},
{
"epoch": 8.875739644970414,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.9591,
"eval_samples_per_second": 36.527,
"eval_steps_per_second": 2.3,
"step": 3000
},
{
"epoch": 9.763313609467456,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8178,
"eval_samples_per_second": 36.667,
"eval_steps_per_second": 2.309,
"step": 3300
},
{
"epoch": 10.355029585798816,
"grad_norm": 9.375,
"learning_rate": 0.00018917405376582145,
"loss": 1.5521,
"step": 3500
},
{
"epoch": 10.650887573964496,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.5472,
"eval_samples_per_second": 36.939,
"eval_steps_per_second": 2.326,
"step": 3600
},
{
"epoch": 11.538461538461538,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.7909,
"eval_samples_per_second": 36.694,
"eval_steps_per_second": 2.31,
"step": 3900
},
{
"epoch": 11.834319526627219,
"grad_norm": 5.875,
"learning_rate": 0.00018522168236559695,
"loss": 1.3787,
"step": 4000
},
{
"epoch": 12.42603550295858,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.9259,
"eval_samples_per_second": 36.56,
"eval_steps_per_second": 2.302,
"step": 4200
},
{
"epoch": 13.31360946745562,
"grad_norm": 4.5625,
"learning_rate": 0.00018071090619916093,
"loss": 1.2505,
"step": 4500
},
{
"epoch": 13.31360946745562,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8178,
"eval_samples_per_second": 36.667,
"eval_steps_per_second": 2.309,
"step": 4500
},
{
"epoch": 14.201183431952662,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8898,
"eval_samples_per_second": 36.596,
"eval_steps_per_second": 2.304,
"step": 4800
},
{
"epoch": 14.792899408284024,
"grad_norm": 3.921875,
"learning_rate": 0.00017567128158176953,
"loss": 1.1568,
"step": 5000
},
{
"epoch": 15.088757396449704,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.6043,
"eval_samples_per_second": 36.881,
"eval_steps_per_second": 2.322,
"step": 5100
},
{
"epoch": 15.976331360946746,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8561,
"eval_samples_per_second": 36.629,
"eval_steps_per_second": 2.306,
"step": 5400
},
{
"epoch": 16.272189349112427,
"grad_norm": 2.984375,
"learning_rate": 0.00017013583004418993,
"loss": 1.0789,
"step": 5500
},
{
"epoch": 16.86390532544379,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.913,
"eval_samples_per_second": 36.572,
"eval_steps_per_second": 2.303,
"step": 5700
},
{
"epoch": 17.75147928994083,
"grad_norm": 3.53125,
"learning_rate": 0.000164140821963114,
"loss": 0.9769,
"step": 6000
},
{
"epoch": 17.75147928994083,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.802,
"eval_samples_per_second": 36.683,
"eval_steps_per_second": 2.31,
"step": 6000
},
{
"epoch": 18.63905325443787,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.9543,
"eval_samples_per_second": 36.532,
"eval_steps_per_second": 2.3,
"step": 6300
},
{
"epoch": 19.23076923076923,
"grad_norm": 3.046875,
"learning_rate": 0.00015772553890390197,
"loss": 0.913,
"step": 6500
},
{
"epoch": 19.526627218934912,
"eval_loss": 0.5067562460899353,
"eval_runtime": 37.0561,
"eval_samples_per_second": 36.431,
"eval_steps_per_second": 2.294,
"step": 6600
},
{
"epoch": 20.414201183431953,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.9,
"eval_samples_per_second": 36.585,
"eval_steps_per_second": 2.304,
"step": 6900
},
{
"epoch": 20.71005917159763,
"grad_norm": 3.1875,
"learning_rate": 0.00015093201623287631,
"loss": 0.8395,
"step": 7000
},
{
"epoch": 21.301775147928993,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8939,
"eval_samples_per_second": 36.591,
"eval_steps_per_second": 2.304,
"step": 7200
},
{
"epoch": 22.189349112426036,
"grad_norm": 2.765625,
"learning_rate": 0.00014380476768566824,
"loss": 0.7613,
"step": 7500
},
{
"epoch": 22.189349112426036,
"eval_loss": 0.5067562460899353,
"eval_runtime": 37.0429,
"eval_samples_per_second": 36.444,
"eval_steps_per_second": 2.295,
"step": 7500
},
{
"epoch": 23.076923076923077,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8494,
"eval_samples_per_second": 36.636,
"eval_steps_per_second": 2.307,
"step": 7800
},
{
"epoch": 23.668639053254438,
"grad_norm": 3.359375,
"learning_rate": 0.00013639049369634876,
"loss": 0.7133,
"step": 8000
},
{
"epoch": 23.964497041420117,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.6719,
"eval_samples_per_second": 36.813,
"eval_steps_per_second": 2.318,
"step": 8100
},
{
"epoch": 24.85207100591716,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8698,
"eval_samples_per_second": 36.615,
"eval_steps_per_second": 2.305,
"step": 8400
},
{
"epoch": 25.14792899408284,
"grad_norm": 4.1875,
"learning_rate": 0.00012873777539848283,
"loss": 0.6546,
"step": 8500
},
{
"epoch": 25.7396449704142,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.693,
"eval_samples_per_second": 36.792,
"eval_steps_per_second": 2.317,
"step": 8700
},
{
"epoch": 26.62721893491124,
"grad_norm": 2.40625,
"learning_rate": 0.00012089675630312754,
"loss": 0.5857,
"step": 9000
},
{
"epoch": 26.62721893491124,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.5163,
"eval_samples_per_second": 36.97,
"eval_steps_per_second": 2.328,
"step": 9000
},
{
"epoch": 27.514792899408285,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.9675,
"eval_samples_per_second": 36.519,
"eval_steps_per_second": 2.299,
"step": 9300
},
{
"epoch": 28.106508875739646,
"grad_norm": 2.265625,
"learning_rate": 0.00011291881373954065,
"loss": 0.5462,
"step": 9500
},
{
"epoch": 28.402366863905325,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8575,
"eval_samples_per_second": 36.628,
"eval_steps_per_second": 2.306,
"step": 9600
},
{
"epoch": 29.28994082840237,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.7695,
"eval_samples_per_second": 36.715,
"eval_steps_per_second": 2.312,
"step": 9900
},
{
"epoch": 29.585798816568047,
"grad_norm": 1.921875,
"learning_rate": 0.00010485622221144484,
"loss": 0.4881,
"step": 10000
},
{
"epoch": 30.17751479289941,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8382,
"eval_samples_per_second": 36.647,
"eval_steps_per_second": 2.307,
"step": 10200
},
{
"epoch": 31.06508875739645,
"grad_norm": 2.140625,
"learning_rate": 9.676181087466444e-05,
"loss": 0.4573,
"step": 10500
},
{
"epoch": 31.06508875739645,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8178,
"eval_samples_per_second": 36.667,
"eval_steps_per_second": 2.309,
"step": 10500
},
{
"epoch": 31.952662721893493,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8459,
"eval_samples_per_second": 36.639,
"eval_steps_per_second": 2.307,
"step": 10800
},
{
"epoch": 32.544378698224854,
"grad_norm": 1.953125,
"learning_rate": 8.868861738047158e-05,
"loss": 0.4072,
"step": 11000
},
{
"epoch": 32.84023668639053,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.9905,
"eval_samples_per_second": 36.496,
"eval_steps_per_second": 2.298,
"step": 11100
},
{
"epoch": 33.72781065088758,
"eval_loss": 0.5067562460899353,
"eval_runtime": 37.2072,
"eval_samples_per_second": 36.283,
"eval_steps_per_second": 2.285,
"step": 11400
},
{
"epoch": 34.023668639053255,
"grad_norm": 2.1875,
"learning_rate": 8.068954035279121e-05,
"loss": 0.3735,
"step": 11500
},
{
"epoch": 34.61538461538461,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8893,
"eval_samples_per_second": 36.596,
"eval_steps_per_second": 2.304,
"step": 11700
},
{
"epoch": 35.50295857988166,
"grad_norm": 1.8046875,
"learning_rate": 7.281699277636572e-05,
"loss": 0.3356,
"step": 12000
},
{
"epoch": 35.50295857988166,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8597,
"eval_samples_per_second": 36.625,
"eval_steps_per_second": 2.306,
"step": 12000
},
{
"epoch": 36.3905325443787,
"eval_loss": 0.5067562460899353,
"eval_runtime": 37.126,
"eval_samples_per_second": 36.363,
"eval_steps_per_second": 2.289,
"step": 12300
},
{
"epoch": 36.98224852071006,
"grad_norm": 1.7578125,
"learning_rate": 6.512255856701177e-05,
"loss": 0.3124,
"step": 12500
},
{
"epoch": 37.27810650887574,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.7155,
"eval_samples_per_second": 36.769,
"eval_steps_per_second": 2.315,
"step": 12600
},
{
"epoch": 38.16568047337278,
"eval_loss": 0.5067562460899353,
"eval_runtime": 37.3255,
"eval_samples_per_second": 36.168,
"eval_steps_per_second": 2.277,
"step": 12900
},
{
"epoch": 38.46153846153846,
"grad_norm": 1.25,
"learning_rate": 5.765665457425102e-05,
"loss": 0.2847,
"step": 13000
},
{
"epoch": 39.053254437869825,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8115,
"eval_samples_per_second": 36.673,
"eval_steps_per_second": 2.309,
"step": 13200
},
{
"epoch": 39.94082840236686,
"grad_norm": 1.90625,
"learning_rate": 5.0468200231001286e-05,
"loss": 0.2682,
"step": 13500
},
{
"epoch": 39.94082840236686,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8329,
"eval_samples_per_second": 36.652,
"eval_steps_per_second": 2.308,
"step": 13500
},
{
"epoch": 40.828402366863905,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.9982,
"eval_samples_per_second": 36.488,
"eval_steps_per_second": 2.297,
"step": 13800
},
{
"epoch": 41.42011834319526,
"grad_norm": 0.79296875,
"learning_rate": 4.360429701490934e-05,
"loss": 0.2514,
"step": 14000
},
{
"epoch": 41.71597633136095,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.9527,
"eval_samples_per_second": 36.533,
"eval_steps_per_second": 2.3,
"step": 14100
},
{
"epoch": 42.603550295857985,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8048,
"eval_samples_per_second": 36.68,
"eval_steps_per_second": 2.309,
"step": 14400
},
{
"epoch": 42.89940828402367,
"grad_norm": 0.9921875,
"learning_rate": 3.710991982161555e-05,
"loss": 0.2398,
"step": 14500
},
{
"epoch": 43.49112426035503,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.6719,
"eval_samples_per_second": 36.813,
"eval_steps_per_second": 2.318,
"step": 14700
},
{
"epoch": 44.37869822485207,
"grad_norm": 0.65234375,
"learning_rate": 3.102762227218957e-05,
"loss": 0.2324,
"step": 15000
},
{
"epoch": 44.37869822485207,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.9077,
"eval_samples_per_second": 36.578,
"eval_steps_per_second": 2.303,
"step": 15000
},
{
"epoch": 45.26627218934911,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.9609,
"eval_samples_per_second": 36.525,
"eval_steps_per_second": 2.3,
"step": 15300
},
{
"epoch": 45.857988165680474,
"grad_norm": 0.6796875,
"learning_rate": 2.5397257885675397e-05,
"loss": 0.2276,
"step": 15500
},
{
"epoch": 46.15384615384615,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.9434,
"eval_samples_per_second": 36.542,
"eval_steps_per_second": 2.301,
"step": 15600
},
{
"epoch": 47.0414201183432,
"eval_loss": 0.5067562460899353,
"eval_runtime": 37.1485,
"eval_samples_per_second": 36.341,
"eval_steps_per_second": 2.288,
"step": 15900
},
{
"epoch": 47.337278106508876,
"grad_norm": 0.73046875,
"learning_rate": 2.025571894372794e-05,
"loss": 0.2244,
"step": 16000
},
{
"epoch": 47.928994082840234,
"eval_loss": 0.5067562460899353,
"eval_runtime": 37.0304,
"eval_samples_per_second": 36.457,
"eval_steps_per_second": 2.295,
"step": 16200
},
{
"epoch": 48.81656804733728,
"grad_norm": 0.66796875,
"learning_rate": 1.563669475839956e-05,
"loss": 0.2242,
"step": 16500
},
{
"epoch": 48.81656804733728,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8527,
"eval_samples_per_second": 36.632,
"eval_steps_per_second": 2.306,
"step": 16500
},
{
"epoch": 49.70414201183432,
"eval_loss": 0.5067562460899353,
"eval_runtime": 37.017,
"eval_samples_per_second": 36.47,
"eval_steps_per_second": 2.296,
"step": 16800
},
{
"epoch": 50.29585798816568,
"grad_norm": 0.71875,
"learning_rate": 1.1570450926997655e-05,
"loss": 0.222,
"step": 17000
},
{
"epoch": 50.59171597633136,
"eval_loss": 0.5067562460899353,
"eval_runtime": 37.0005,
"eval_samples_per_second": 36.486,
"eval_steps_per_second": 2.297,
"step": 17100
},
{
"epoch": 51.4792899408284,
"eval_loss": 0.5067562460899353,
"eval_runtime": 37.0483,
"eval_samples_per_second": 36.439,
"eval_steps_per_second": 2.294,
"step": 17400
},
{
"epoch": 51.77514792899408,
"grad_norm": 0.73046875,
"learning_rate": 8.083631020418791e-06,
"loss": 0.2219,
"step": 17500
},
{
"epoch": 52.366863905325445,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8489,
"eval_samples_per_second": 36.636,
"eval_steps_per_second": 2.307,
"step": 17700
},
{
"epoch": 53.25443786982248,
"grad_norm": 0.99609375,
"learning_rate": 5.199082004372957e-06,
"loss": 0.2215,
"step": 18000
},
{
"epoch": 53.25443786982248,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8278,
"eval_samples_per_second": 36.657,
"eval_steps_per_second": 2.308,
"step": 18000
},
{
"epoch": 54.142011834319526,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8191,
"eval_samples_per_second": 36.666,
"eval_steps_per_second": 2.309,
"step": 18300
},
{
"epoch": 54.73372781065089,
"grad_norm": 1.1328125,
"learning_rate": 2.9357045374040825e-06,
"loss": 0.2224,
"step": 18500
},
{
"epoch": 55.02958579881657,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.9293,
"eval_samples_per_second": 36.556,
"eval_steps_per_second": 2.302,
"step": 18600
},
{
"epoch": 55.917159763313606,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8842,
"eval_samples_per_second": 36.601,
"eval_steps_per_second": 2.305,
"step": 18900
},
{
"epoch": 56.21301775147929,
"grad_norm": 0.7421875,
"learning_rate": 1.30832912661093e-06,
"loss": 0.2211,
"step": 19000
},
{
"epoch": 56.80473372781065,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.681,
"eval_samples_per_second": 36.804,
"eval_steps_per_second": 2.317,
"step": 19200
},
{
"epoch": 57.69230769230769,
"grad_norm": 1.0546875,
"learning_rate": 3.2761895254306287e-07,
"loss": 0.2212,
"step": 19500
},
{
"epoch": 57.69230769230769,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.6389,
"eval_samples_per_second": 36.846,
"eval_steps_per_second": 2.32,
"step": 19500
},
{
"epoch": 58.57988165680474,
"eval_loss": 0.5067562460899353,
"eval_runtime": 36.8606,
"eval_samples_per_second": 36.624,
"eval_steps_per_second": 2.306,
"step": 19800
},
{
"epoch": 59.171597633136095,
"grad_norm": 0.78515625,
"learning_rate": 0.0,
"loss": 0.2214,
"step": 20000
}
],
"logging_steps": 500,
"max_steps": 20000,
"num_input_tokens_seen": 0,
"num_train_epochs": 60,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.745192353814282e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}