| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.3310701584222438, |
| "eval_steps": 1024, |
| "global_step": 7168, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011823934229365849, |
| "grad_norm": 1.1487267017364502, |
| "learning_rate": 0.000498046875, |
| "loss": 11.798027992248535, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.023647868458731697, |
| "grad_norm": 0.8386015295982361, |
| "learning_rate": 0.000998046875, |
| "loss": 1.7853779792785645, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03547180268809755, |
| "grad_norm": 0.7344363331794739, |
| "learning_rate": 0.000999640996023194, |
| "loss": 1.103014588356018, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "grad_norm": 1.1188315153121948, |
| "learning_rate": 0.0009985588674043958, |
| "loss": 0.9580796360969543, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "eval_bleu": 0.9192375404443994, |
| "eval_ce_loss": 0.26167990953648745, |
| "eval_cos_loss": 0.26406568816127296, |
| "eval_loss": 0.9037111119864738, |
| "eval_mse_loss": 0.6016385473617135, |
| "eval_rec_loss": 0.013986090569199833, |
| "flow/cos_sim": 0.7359343250048215, |
| "flow/improvement_ratio": 0.9760946458605326, |
| "flow/mag_ratio_mean": 0.7269674200717717, |
| "flow/mag_ratio_std": 0.1390784539316343, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "eval_bleu": 0.9192375404443994, |
| "eval_ce_loss": 0.26167990953648745, |
| "eval_cos_loss": 0.26406568816127296, |
| "eval_loss": 0.9037111119864738, |
| "eval_mse_loss": 0.6016385473617135, |
| "eval_rec_loss": 0.013986090569199833, |
| "eval_runtime": 144.0156, |
| "eval_samples_per_second": 194.375, |
| "eval_steps_per_second": 3.041, |
| "flow/cos_sim": 0.7359343250048215, |
| "flow/improvement_ratio": 0.9760946458605326, |
| "flow/mag_ratio_mean": 0.7269674200717717, |
| "flow/mag_ratio_std": 0.1390784539316343, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.05911967114682925, |
| "grad_norm": 1.0113043785095215, |
| "learning_rate": 0.0009967551747861387, |
| "loss": 0.8836896419525146, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.0709436053761951, |
| "grad_norm": 0.9680636525154114, |
| "learning_rate": 0.000994232528651847, |
| "loss": 0.8432819247245789, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.08276753960556095, |
| "grad_norm": 1.166627049446106, |
| "learning_rate": 0.0009909945800260092, |
| "loss": 0.7870283126831055, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "grad_norm": 0.7747617363929749, |
| "learning_rate": 0.0009870460151900522, |
| "loss": 0.7735522389411926, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "eval_bleu": 0.9264483676285591, |
| "eval_ce_loss": 0.22021640589690372, |
| "eval_cos_loss": 0.15934634184864557, |
| "eval_loss": 0.751849727815689, |
| "eval_mse_loss": 0.51190055103879, |
| "eval_rec_loss": 0.0037981332031229603, |
| "flow/cos_sim": 0.8406536742432477, |
| "flow/improvement_ratio": 0.9754726998337871, |
| "flow/mag_ratio_mean": 0.8395581131112085, |
| "flow/mag_ratio_std": 0.09706963221096013, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "eval_bleu": 0.9264483676285591, |
| "eval_ce_loss": 0.22021640589690372, |
| "eval_cos_loss": 0.15934634184864557, |
| "eval_loss": 0.751849727815689, |
| "eval_mse_loss": 0.51190055103879, |
| "eval_rec_loss": 0.0037981332031229603, |
| "eval_runtime": 139.3758, |
| "eval_samples_per_second": 200.845, |
| "eval_steps_per_second": 3.143, |
| "flow/cos_sim": 0.8406536742432477, |
| "flow/improvement_ratio": 0.9754726998337871, |
| "flow/mag_ratio_mean": 0.8395581131112085, |
| "flow/mag_ratio_std": 0.09706963221096013, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.10641540806429264, |
| "grad_norm": 1.3507742881774902, |
| "learning_rate": 0.0009823925488998885, |
| "loss": 0.7531520128250122, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.1182393422936585, |
| "grad_norm": 1.090326189994812, |
| "learning_rate": 0.0009770409161149525, |
| "loss": 0.7384664416313171, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.13006327652302435, |
| "grad_norm": 1.6648627519607544, |
| "learning_rate": 0.0009709988622506973, |
| "loss": 0.7159472107887268, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "grad_norm": 0.720405638217926, |
| "learning_rate": 0.000964275131968659, |
| "loss": 0.7134207487106323, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "eval_bleu": 0.9362046037171927, |
| "eval_ce_loss": 0.18669388993094638, |
| "eval_cos_loss": 0.09599785676829892, |
| "eval_loss": 0.7009708360177741, |
| "eval_mse_loss": 0.5027340843797274, |
| "eval_rec_loss": 0.0019430727923829022, |
| "flow/cos_sim": 0.9040021625555814, |
| "flow/improvement_ratio": 0.9753212502830104, |
| "flow/mag_ratio_mean": 0.906250716208323, |
| "flow/mag_ratio_std": 0.07307879087519428, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "eval_bleu": 0.9362046037171927, |
| "eval_ce_loss": 0.18669388993094638, |
| "eval_cos_loss": 0.09599785676829892, |
| "eval_loss": 0.7009708360177741, |
| "eval_mse_loss": 0.5027340843797274, |
| "eval_rec_loss": 0.0019430727923829022, |
| "eval_runtime": 139.2617, |
| "eval_samples_per_second": 201.01, |
| "eval_steps_per_second": 3.145, |
| "flow/cos_sim": 0.9040021625555814, |
| "flow/improvement_ratio": 0.9753212502830104, |
| "flow/mag_ratio_mean": 0.906250716208323, |
| "flow/mag_ratio_std": 0.07307879087519428, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.15371114498175603, |
| "grad_norm": 1.171225905418396, |
| "learning_rate": 0.0009568794565203123, |
| "loss": 0.6967981457710266, |
| "step": 3328 |
| }, |
| { |
| "epoch": 0.1655350792111219, |
| "grad_norm": 0.9184499979019165, |
| "learning_rate": 0.0009488225396630347, |
| "loss": 0.6859588623046875, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.17735901344048774, |
| "grad_norm": 1.0972322225570679, |
| "learning_rate": 0.0009401160421685646, |
| "loss": 0.68483966588974, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "grad_norm": 1.2944236993789673, |
| "learning_rate": 0.0009307725649463714, |
| "loss": 0.6722217202186584, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "eval_bleu": 0.9367529454304925, |
| "eval_ce_loss": 0.17851220300531687, |
| "eval_cos_loss": 0.0685597131088308, |
| "eval_loss": 0.6638682314522191, |
| "eval_mse_loss": 0.4771829223660029, |
| "eval_rec_loss": 0.0013171346048419428, |
| "flow/cos_sim": 0.9314403127045392, |
| "flow/improvement_ratio": 0.9747336862021929, |
| "flow/mag_ratio_mean": 0.9314602082722807, |
| "flow/mag_ratio_std": 0.05983651272068013, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "eval_bleu": 0.9367529454304925, |
| "eval_ce_loss": 0.17851220300531687, |
| "eval_cos_loss": 0.0685597131088308, |
| "eval_loss": 0.6638682314522191, |
| "eval_mse_loss": 0.4771829223660029, |
| "eval_rec_loss": 0.0013171346048419428, |
| "eval_runtime": 143.247, |
| "eval_samples_per_second": 195.418, |
| "eval_steps_per_second": 3.058, |
| "flow/cos_sim": 0.9314403127045392, |
| "flow/improvement_ratio": 0.9747336862021929, |
| "flow/mag_ratio_mean": 0.9314602082722807, |
| "flow/mag_ratio_std": 0.05983651272068013, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.20100688189921945, |
| "grad_norm": 1.0950664281845093, |
| "learning_rate": 0.0009208056308063659, |
| "loss": 0.6635431051254272, |
| "step": 4352 |
| }, |
| { |
| "epoch": 0.2128308161285853, |
| "grad_norm": 1.0510311126708984, |
| "learning_rate": 0.0009102296648873445, |
| "loss": 0.652130126953125, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.22465475035795113, |
| "grad_norm": 0.7107524275779724, |
| "learning_rate": 0.0008990599737794927, |
| "loss": 0.6548014283180237, |
| "step": 4864 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "grad_norm": 1.119279146194458, |
| "learning_rate": 0.0008873127233711644, |
| "loss": 0.644295871257782, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "eval_bleu": 0.9375087809294717, |
| "eval_ce_loss": 0.17866700632629498, |
| "eval_cos_loss": 0.05568918508379699, |
| "eval_loss": 0.6384022356304404, |
| "eval_mse_loss": 0.4531491862856634, |
| "eval_rec_loss": 0.0010171248973892112, |
| "flow/cos_sim": 0.9443108406785417, |
| "flow/improvement_ratio": 0.9754358607612245, |
| "flow/mag_ratio_mean": 0.9432625220790846, |
| "flow/mag_ratio_std": 0.0532344374550532, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "eval_bleu": 0.9375087809294717, |
| "eval_ce_loss": 0.17866700632629498, |
| "eval_cos_loss": 0.05568918508379699, |
| "eval_loss": 0.6384022356304404, |
| "eval_mse_loss": 0.4531491862856634, |
| "eval_rec_loss": 0.0010171248973892112, |
| "eval_runtime": 140.7376, |
| "eval_samples_per_second": 198.902, |
| "eval_steps_per_second": 3.112, |
| "flow/cos_sim": 0.9443108406785417, |
| "flow/improvement_ratio": 0.9754358607612245, |
| "flow/mag_ratio_mean": 0.9432625220790846, |
| "flow/mag_ratio_std": 0.0532344374550532, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.24830261881668284, |
| "grad_norm": 1.223329782485962, |
| "learning_rate": 0.0008750049154520011, |
| "loss": 0.6385497450828552, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.2601265530460487, |
| "grad_norm": 0.7951129078865051, |
| "learning_rate": 0.0008621543631062487, |
| "loss": 0.6350575089454651, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.27195048727541454, |
| "grad_norm": 0.8830247521400452, |
| "learning_rate": 0.0008487796649318904, |
| "loss": 0.6269800066947937, |
| "step": 5888 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "grad_norm": 1.0399079322814941, |
| "learning_rate": 0.0008349001781229053, |
| "loss": 0.6236906051635742, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "eval_bleu": 0.9406288888554537, |
| "eval_ce_loss": 0.16271350685439018, |
| "eval_cos_loss": 0.04946032597696128, |
| "eval_loss": 0.6158577807962078, |
| "eval_mse_loss": 0.4472781540868489, |
| "eval_rec_loss": 0.0009200886164281037, |
| "flow/cos_sim": 0.9505396935765602, |
| "flow/improvement_ratio": 0.9752184091365501, |
| "flow/mag_ratio_mean": 0.9574754819205907, |
| "flow/mag_ratio_std": 0.04891788842131014, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "eval_bleu": 0.9406288888554537, |
| "eval_ce_loss": 0.16271350685439018, |
| "eval_cos_loss": 0.04946032597696128, |
| "eval_loss": 0.6158577807962078, |
| "eval_mse_loss": 0.4472781540868489, |
| "eval_rec_loss": 0.0009200886164281037, |
| "eval_runtime": 138.7881, |
| "eval_samples_per_second": 201.696, |
| "eval_steps_per_second": 3.156, |
| "flow/cos_sim": 0.9505396935765602, |
| "flow/improvement_ratio": 0.9752184091365501, |
| "flow/mag_ratio_mean": 0.9574754819205907, |
| "flow/mag_ratio_std": 0.04891788842131014, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2955983557341462, |
| "grad_norm": 1.2804330587387085, |
| "learning_rate": 0.0008205359904536107, |
| "loss": 0.6104704737663269, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.30742228996351206, |
| "grad_norm": 1.038807988166809, |
| "learning_rate": 0.0008057078912056363, |
| "loss": 0.6035579442977905, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.3192462241928779, |
| "grad_norm": 1.1162539720535278, |
| "learning_rate": 0.0007904373410796086, |
| "loss": 0.6099694967269897, |
| "step": 6912 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "grad_norm": 0.8053554892539978, |
| "learning_rate": 0.0007747464411350876, |
| "loss": 0.6010444760322571, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "eval_bleu": 0.9403609007223014, |
| "eval_ce_loss": 0.16489822024130793, |
| "eval_cos_loss": 0.042615741474307293, |
| "eval_loss": 0.5965314044799979, |
| "eval_mse_loss": 0.4266453656839998, |
| "eval_rec_loss": 0.0007262465627901213, |
| "flow/cos_sim": 0.9573842790573155, |
| "flow/improvement_ratio": 0.9754182146564466, |
| "flow/mag_ratio_mean": 0.9583017167435389, |
| "flow/mag_ratio_std": 0.044361623012584096, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "eval_bleu": 0.9403609007223014, |
| "eval_ce_loss": 0.16489822024130793, |
| "eval_cos_loss": 0.042615741474307293, |
| "eval_loss": 0.5965314044799979, |
| "eval_mse_loss": 0.4266453656839998, |
| "eval_rec_loss": 0.0007262465627901213, |
| "eval_runtime": 141.0373, |
| "eval_samples_per_second": 198.479, |
| "eval_steps_per_second": 3.106, |
| "flow/cos_sim": 0.9573842790573155, |
| "flow/improvement_ratio": 0.9754182146564466, |
| "flow/mag_ratio_mean": 0.9583017167435389, |
| "flow/mag_ratio_std": 0.044361623012584096, |
| "step": 7168 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 21651, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|