| { |
| "best_metric": 0.6171663917720605, |
| "best_model_checkpoint": "experiments/03_final_evaluation/checkpoint-1600", |
| "epoch": 2.296211251435132, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00025, |
| "loss": 4.0178, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0005, |
| "loss": 1.5061, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0004900517309988062, |
| "loss": 1.108, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0004801034619976124, |
| "loss": 0.995, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.23, |
| "eval_bleu": 0.5211945944679698, |
| "eval_chrf": 72.77015539964906, |
| "eval_loss": 0.8683308362960815, |
| "eval_runtime": 985.6393, |
| "eval_samples_per_second": 1.762, |
| "eval_steps_per_second": 0.441, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00047015519299641864, |
| "loss": 0.9542, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00046020692399522485, |
| "loss": 0.9054, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00045025865499403106, |
| "loss": 0.9541, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0004403103859928372, |
| "loss": 0.8163, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.46, |
| "eval_bleu": 0.5508922324303929, |
| "eval_chrf": 74.77937454189892, |
| "eval_loss": 0.7693426012992859, |
| "eval_runtime": 990.5843, |
| "eval_samples_per_second": 1.754, |
| "eval_steps_per_second": 0.439, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.0004303621169916435, |
| "loss": 0.8115, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0004204138479904497, |
| "loss": 0.8257, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0004104655789892559, |
| "loss": 0.85, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00040051730998806205, |
| "loss": 0.7539, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.69, |
| "eval_bleu": 0.578133264686958, |
| "eval_chrf": 76.1535645404127, |
| "eval_loss": 0.7177000641822815, |
| "eval_runtime": 984.3259, |
| "eval_samples_per_second": 1.765, |
| "eval_steps_per_second": 0.442, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.0003905690409868683, |
| "loss": 0.7984, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0003806207719856745, |
| "loss": 0.7743, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.0003706725029844807, |
| "loss": 0.7716, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.0003607242339832869, |
| "loss": 0.8132, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.92, |
| "eval_bleu": 0.5912421384389305, |
| "eval_chrf": 76.74654127962616, |
| "eval_loss": 0.6842420101165771, |
| "eval_runtime": 993.9253, |
| "eval_samples_per_second": 1.748, |
| "eval_steps_per_second": 0.438, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00035077596498209315, |
| "loss": 0.764, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.00034082769598089936, |
| "loss": 0.7135, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.0003310783923597294, |
| "loss": 0.6982, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00032113012335853566, |
| "loss": 0.6668, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.15, |
| "eval_bleu": 0.5958955137792846, |
| "eval_chrf": 77.17919063742045, |
| "eval_loss": 0.6704440712928772, |
| "eval_runtime": 993.5204, |
| "eval_samples_per_second": 1.748, |
| "eval_steps_per_second": 0.438, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.0003111818543573418, |
| "loss": 0.7079, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00030123358535614803, |
| "loss": 0.6886, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00029128531635495424, |
| "loss": 0.6601, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.0002813370473537605, |
| "loss": 0.6751, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.38, |
| "eval_bleu": 0.602066467183117, |
| "eval_chrf": 77.27938324850041, |
| "eval_loss": 0.6549907922744751, |
| "eval_runtime": 989.9629, |
| "eval_samples_per_second": 1.755, |
| "eval_steps_per_second": 0.439, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.00027138877835256666, |
| "loss": 0.7087, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.00026144050935137286, |
| "loss": 0.6184, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 0.0002514922403501791, |
| "loss": 0.6633, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 0.00024154397134898528, |
| "loss": 0.6842, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.61, |
| "eval_bleu": 0.6104046387091125, |
| "eval_chrf": 77.9484901357151, |
| "eval_loss": 0.638070285320282, |
| "eval_runtime": 996.9874, |
| "eval_samples_per_second": 1.742, |
| "eval_steps_per_second": 0.436, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 0.0002315957023477915, |
| "loss": 0.6905, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 0.0002216474333465977, |
| "loss": 0.6653, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 0.0002116991643454039, |
| "loss": 0.648, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 0.00020175089534421012, |
| "loss": 0.7029, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.84, |
| "eval_bleu": 0.6171663917720605, |
| "eval_chrf": 78.49582130458744, |
| "eval_loss": 0.6304110288619995, |
| "eval_runtime": 995.702, |
| "eval_samples_per_second": 1.744, |
| "eval_steps_per_second": 0.437, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 0.00019180262634301633, |
| "loss": 0.6799, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 0.00018185435734182254, |
| "loss": 0.647, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 0.00017190608834062875, |
| "loss": 0.6635, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 0.00016195781933943493, |
| "loss": 0.5784, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.07, |
| "eval_bleu": 0.6131755529970182, |
| "eval_chrf": 78.30699244035627, |
| "eval_loss": 0.6250360608100891, |
| "eval_runtime": 999.8083, |
| "eval_samples_per_second": 1.737, |
| "eval_steps_per_second": 0.435, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 0.00015200955033824117, |
| "loss": 0.6047, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 0.00014206128133704735, |
| "loss": 0.6109, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 0.00013211301233585358, |
| "loss": 0.6081, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 0.00012216474333465977, |
| "loss": 0.5927, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.3, |
| "eval_bleu": 0.6113260437693911, |
| "eval_chrf": 78.28644733075303, |
| "eval_loss": 0.6183715462684631, |
| "eval_runtime": 997.0668, |
| "eval_samples_per_second": 1.742, |
| "eval_steps_per_second": 0.436, |
| "step": 2000 |
| } |
| ], |
| "max_steps": 2613, |
| "num_train_epochs": 3, |
| "total_flos": 2926203245641728.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|