mt_en_no_oil / trainer_state.json
entropy25's picture
Upload 12 files
591f93b verified
{
"best_metric": 0.6171663917720605,
"best_model_checkpoint": "experiments/03_final_evaluation/checkpoint-1600",
"epoch": 2.296211251435132,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 0.00025,
"loss": 4.0178,
"step": 50
},
{
"epoch": 0.11,
"learning_rate": 0.0005,
"loss": 1.5061,
"step": 100
},
{
"epoch": 0.17,
"learning_rate": 0.0004900517309988062,
"loss": 1.108,
"step": 150
},
{
"epoch": 0.23,
"learning_rate": 0.0004801034619976124,
"loss": 0.995,
"step": 200
},
{
"epoch": 0.23,
"eval_bleu": 0.5211945944679698,
"eval_chrf": 72.77015539964906,
"eval_loss": 0.8683308362960815,
"eval_runtime": 985.6393,
"eval_samples_per_second": 1.762,
"eval_steps_per_second": 0.441,
"step": 200
},
{
"epoch": 0.29,
"learning_rate": 0.00047015519299641864,
"loss": 0.9542,
"step": 250
},
{
"epoch": 0.34,
"learning_rate": 0.00046020692399522485,
"loss": 0.9054,
"step": 300
},
{
"epoch": 0.4,
"learning_rate": 0.00045025865499403106,
"loss": 0.9541,
"step": 350
},
{
"epoch": 0.46,
"learning_rate": 0.0004403103859928372,
"loss": 0.8163,
"step": 400
},
{
"epoch": 0.46,
"eval_bleu": 0.5508922324303929,
"eval_chrf": 74.77937454189892,
"eval_loss": 0.7693426012992859,
"eval_runtime": 990.5843,
"eval_samples_per_second": 1.754,
"eval_steps_per_second": 0.439,
"step": 400
},
{
"epoch": 0.52,
"learning_rate": 0.0004303621169916435,
"loss": 0.8115,
"step": 450
},
{
"epoch": 0.57,
"learning_rate": 0.0004204138479904497,
"loss": 0.8257,
"step": 500
},
{
"epoch": 0.63,
"learning_rate": 0.0004104655789892559,
"loss": 0.85,
"step": 550
},
{
"epoch": 0.69,
"learning_rate": 0.00040051730998806205,
"loss": 0.7539,
"step": 600
},
{
"epoch": 0.69,
"eval_bleu": 0.578133264686958,
"eval_chrf": 76.1535645404127,
"eval_loss": 0.7177000641822815,
"eval_runtime": 984.3259,
"eval_samples_per_second": 1.765,
"eval_steps_per_second": 0.442,
"step": 600
},
{
"epoch": 0.75,
"learning_rate": 0.0003905690409868683,
"loss": 0.7984,
"step": 650
},
{
"epoch": 0.8,
"learning_rate": 0.0003806207719856745,
"loss": 0.7743,
"step": 700
},
{
"epoch": 0.86,
"learning_rate": 0.0003706725029844807,
"loss": 0.7716,
"step": 750
},
{
"epoch": 0.92,
"learning_rate": 0.0003607242339832869,
"loss": 0.8132,
"step": 800
},
{
"epoch": 0.92,
"eval_bleu": 0.5912421384389305,
"eval_chrf": 76.74654127962616,
"eval_loss": 0.6842420101165771,
"eval_runtime": 993.9253,
"eval_samples_per_second": 1.748,
"eval_steps_per_second": 0.438,
"step": 800
},
{
"epoch": 0.98,
"learning_rate": 0.00035077596498209315,
"loss": 0.764,
"step": 850
},
{
"epoch": 1.03,
"learning_rate": 0.00034082769598089936,
"loss": 0.7135,
"step": 900
},
{
"epoch": 1.09,
"learning_rate": 0.0003310783923597294,
"loss": 0.6982,
"step": 950
},
{
"epoch": 1.15,
"learning_rate": 0.00032113012335853566,
"loss": 0.6668,
"step": 1000
},
{
"epoch": 1.15,
"eval_bleu": 0.5958955137792846,
"eval_chrf": 77.17919063742045,
"eval_loss": 0.6704440712928772,
"eval_runtime": 993.5204,
"eval_samples_per_second": 1.748,
"eval_steps_per_second": 0.438,
"step": 1000
},
{
"epoch": 1.21,
"learning_rate": 0.0003111818543573418,
"loss": 0.7079,
"step": 1050
},
{
"epoch": 1.26,
"learning_rate": 0.00030123358535614803,
"loss": 0.6886,
"step": 1100
},
{
"epoch": 1.32,
"learning_rate": 0.00029128531635495424,
"loss": 0.6601,
"step": 1150
},
{
"epoch": 1.38,
"learning_rate": 0.0002813370473537605,
"loss": 0.6751,
"step": 1200
},
{
"epoch": 1.38,
"eval_bleu": 0.602066467183117,
"eval_chrf": 77.27938324850041,
"eval_loss": 0.6549907922744751,
"eval_runtime": 989.9629,
"eval_samples_per_second": 1.755,
"eval_steps_per_second": 0.439,
"step": 1200
},
{
"epoch": 1.44,
"learning_rate": 0.00027138877835256666,
"loss": 0.7087,
"step": 1250
},
{
"epoch": 1.49,
"learning_rate": 0.00026144050935137286,
"loss": 0.6184,
"step": 1300
},
{
"epoch": 1.55,
"learning_rate": 0.0002514922403501791,
"loss": 0.6633,
"step": 1350
},
{
"epoch": 1.61,
"learning_rate": 0.00024154397134898528,
"loss": 0.6842,
"step": 1400
},
{
"epoch": 1.61,
"eval_bleu": 0.6104046387091125,
"eval_chrf": 77.9484901357151,
"eval_loss": 0.638070285320282,
"eval_runtime": 996.9874,
"eval_samples_per_second": 1.742,
"eval_steps_per_second": 0.436,
"step": 1400
},
{
"epoch": 1.66,
"learning_rate": 0.0002315957023477915,
"loss": 0.6905,
"step": 1450
},
{
"epoch": 1.72,
"learning_rate": 0.0002216474333465977,
"loss": 0.6653,
"step": 1500
},
{
"epoch": 1.78,
"learning_rate": 0.0002116991643454039,
"loss": 0.648,
"step": 1550
},
{
"epoch": 1.84,
"learning_rate": 0.00020175089534421012,
"loss": 0.7029,
"step": 1600
},
{
"epoch": 1.84,
"eval_bleu": 0.6171663917720605,
"eval_chrf": 78.49582130458744,
"eval_loss": 0.6304110288619995,
"eval_runtime": 995.702,
"eval_samples_per_second": 1.744,
"eval_steps_per_second": 0.437,
"step": 1600
},
{
"epoch": 1.89,
"learning_rate": 0.00019180262634301633,
"loss": 0.6799,
"step": 1650
},
{
"epoch": 1.95,
"learning_rate": 0.00018185435734182254,
"loss": 0.647,
"step": 1700
},
{
"epoch": 2.01,
"learning_rate": 0.00017190608834062875,
"loss": 0.6635,
"step": 1750
},
{
"epoch": 2.07,
"learning_rate": 0.00016195781933943493,
"loss": 0.5784,
"step": 1800
},
{
"epoch": 2.07,
"eval_bleu": 0.6131755529970182,
"eval_chrf": 78.30699244035627,
"eval_loss": 0.6250360608100891,
"eval_runtime": 999.8083,
"eval_samples_per_second": 1.737,
"eval_steps_per_second": 0.435,
"step": 1800
},
{
"epoch": 2.12,
"learning_rate": 0.00015200955033824117,
"loss": 0.6047,
"step": 1850
},
{
"epoch": 2.18,
"learning_rate": 0.00014206128133704735,
"loss": 0.6109,
"step": 1900
},
{
"epoch": 2.24,
"learning_rate": 0.00013211301233585358,
"loss": 0.6081,
"step": 1950
},
{
"epoch": 2.3,
"learning_rate": 0.00012216474333465977,
"loss": 0.5927,
"step": 2000
},
{
"epoch": 2.3,
"eval_bleu": 0.6113260437693911,
"eval_chrf": 78.28644733075303,
"eval_loss": 0.6183715462684631,
"eval_runtime": 997.0668,
"eval_samples_per_second": 1.742,
"eval_steps_per_second": 0.436,
"step": 2000
}
],
"max_steps": 2613,
"num_train_epochs": 3,
"total_flos": 2926203245641728.0,
"trial_name": null,
"trial_params": null
}