| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.287374362065919, |
| "eval_steps": 1024, |
| "global_step": 27648, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.010643494891330332, |
| "grad_norm": 0.13342437148094177, |
| "learning_rate": 0.0003330078125, |
| "loss": 2.2998437881469727, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.010643494891330332, |
| "eval_cos_loss": 0.5988449528813362, |
| "eval_loss": 1.9600126259028912, |
| "eval_mse_loss": 1.6605901420116425, |
| "flow/cos_sim": 0.4011551085859537, |
| "flow/improvement_ratio": 0.942170824855566, |
| "flow/mag_ratio_mean": 0.37856073677539825, |
| "flow/mag_ratio_std": 0.14085532305762172, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.010643494891330332, |
| "eval_cos_loss": 0.5988449528813362, |
| "eval_loss": 1.9600126259028912, |
| "eval_mse_loss": 1.6605901420116425, |
| "eval_runtime": 2.6584, |
| "eval_samples_per_second": 752.329, |
| "eval_steps_per_second": 12.037, |
| "flow/cos_sim": 0.4011551085859537, |
| "flow/improvement_ratio": 0.942170824855566, |
| "flow/mag_ratio_mean": 0.37856073677539825, |
| "flow/mag_ratio_std": 0.14085532305762172, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.021286989782660665, |
| "grad_norm": 0.25054192543029785, |
| "learning_rate": 0.0006663411458333333, |
| "loss": 1.8492329120635986, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.021286989782660665, |
| "eval_cos_loss": 0.5117531130090356, |
| "eval_loss": 1.7429817728698254, |
| "eval_mse_loss": 1.4871052131056786, |
| "flow/cos_sim": 0.4882468534633517, |
| "flow/improvement_ratio": 0.9563530795276165, |
| "flow/mag_ratio_mean": 0.47669631242752075, |
| "flow/mag_ratio_std": 0.17675806442275643, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.021286989782660665, |
| "eval_cos_loss": 0.5117531130090356, |
| "eval_loss": 1.7429817728698254, |
| "eval_mse_loss": 1.4871052131056786, |
| "eval_runtime": 2.511, |
| "eval_samples_per_second": 796.509, |
| "eval_steps_per_second": 12.744, |
| "flow/cos_sim": 0.4882468534633517, |
| "flow/improvement_ratio": 0.9563530795276165, |
| "flow/mag_ratio_mean": 0.47669631242752075, |
| "flow/mag_ratio_std": 0.17675806442275643, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.031930484673991, |
| "grad_norm": 0.30941224098205566, |
| "learning_rate": 0.0009996744791666667, |
| "loss": 1.730944037437439, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.031930484673991, |
| "eval_cos_loss": 0.4815286351367831, |
| "eval_loss": 1.6586528308689594, |
| "eval_mse_loss": 1.4178885221481323, |
| "flow/cos_sim": 0.5184714393690228, |
| "flow/improvement_ratio": 0.9605911839753389, |
| "flow/mag_ratio_mean": 0.49818364903330803, |
| "flow/mag_ratio_std": 0.1928270636126399, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.031930484673991, |
| "eval_cos_loss": 0.4815286351367831, |
| "eval_loss": 1.6586528308689594, |
| "eval_mse_loss": 1.4178885221481323, |
| "eval_runtime": 3.1033, |
| "eval_samples_per_second": 644.485, |
| "eval_steps_per_second": 10.312, |
| "flow/cos_sim": 0.5184714393690228, |
| "flow/improvement_ratio": 0.9605911839753389, |
| "flow/mag_ratio_mean": 0.49818364903330803, |
| "flow/mag_ratio_std": 0.1928270636126399, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.04257397956532133, |
| "grad_norm": 0.22964967787265778, |
| "learning_rate": 0.0009997023516784352, |
| "loss": 1.6850833892822266, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.04257397956532133, |
| "eval_cos_loss": 0.476364528760314, |
| "eval_loss": 1.6391540355980396, |
| "eval_mse_loss": 1.4009717665612698, |
| "flow/cos_sim": 0.5236354488879442, |
| "flow/improvement_ratio": 0.9618693646043539, |
| "flow/mag_ratio_mean": 0.5105963433161378, |
| "flow/mag_ratio_std": 0.20592432795092463, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.04257397956532133, |
| "eval_cos_loss": 0.476364528760314, |
| "eval_loss": 1.6391540355980396, |
| "eval_mse_loss": 1.4009717665612698, |
| "eval_runtime": 2.5129, |
| "eval_samples_per_second": 795.895, |
| "eval_steps_per_second": 12.734, |
| "flow/cos_sim": 0.5236354488879442, |
| "flow/improvement_ratio": 0.9618693646043539, |
| "flow/mag_ratio_mean": 0.5105963433161378, |
| "flow/mag_ratio_std": 0.20592432795092463, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.05321747445665166, |
| "grad_norm": 0.2645546495914459, |
| "learning_rate": 0.0009988085977910004, |
| "loss": 1.6617510318756104, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.05321747445665166, |
| "eval_cos_loss": 0.4789119102060795, |
| "eval_loss": 1.645260013639927, |
| "eval_mse_loss": 1.405804067850113, |
| "flow/cos_sim": 0.5210880534723401, |
| "flow/improvement_ratio": 0.9588682930916548, |
| "flow/mag_ratio_mean": 0.504288200289011, |
| "flow/mag_ratio_std": 0.20718340016901493, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.05321747445665166, |
| "eval_cos_loss": 0.4789119102060795, |
| "eval_loss": 1.645260013639927, |
| "eval_mse_loss": 1.405804067850113, |
| "eval_runtime": 3.0976, |
| "eval_samples_per_second": 645.654, |
| "eval_steps_per_second": 10.33, |
| "flow/cos_sim": 0.5210880534723401, |
| "flow/improvement_ratio": 0.9588682930916548, |
| "flow/mag_ratio_mean": 0.504288200289011, |
| "flow/mag_ratio_std": 0.20718340016901493, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.063860969347982, |
| "grad_norm": 0.2762889862060547, |
| "learning_rate": 0.0009973198042317873, |
| "loss": 1.645796775817871, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.063860969347982, |
| "eval_cos_loss": 0.4598818449303508, |
| "eval_loss": 1.5948525853455067, |
| "eval_mse_loss": 1.3649116680026054, |
| "flow/cos_sim": 0.5401182025671005, |
| "flow/improvement_ratio": 0.9647715575993061, |
| "flow/mag_ratio_mean": 0.5178880272433162, |
| "flow/mag_ratio_std": 0.21153279254212976, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.063860969347982, |
| "eval_cos_loss": 0.4598818449303508, |
| "eval_loss": 1.5948525853455067, |
| "eval_mse_loss": 1.3649116680026054, |
| "eval_runtime": 3.0831, |
| "eval_samples_per_second": 648.695, |
| "eval_steps_per_second": 10.379, |
| "flow/cos_sim": 0.5401182025671005, |
| "flow/improvement_ratio": 0.9647715575993061, |
| "flow/mag_ratio_mean": 0.5178880272433162, |
| "flow/mag_ratio_std": 0.21153279254212976, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.07450446423931233, |
| "grad_norm": 0.17679959535598755, |
| "learning_rate": 0.0009952377470151526, |
| "loss": 1.6353809833526611, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.07450446423931233, |
| "eval_cos_loss": 0.4634226718917489, |
| "eval_loss": 1.6022505089640617, |
| "eval_mse_loss": 1.3705391697585583, |
| "flow/cos_sim": 0.5365773290395737, |
| "flow/improvement_ratio": 0.9635819494724274, |
| "flow/mag_ratio_mean": 0.5194354858249426, |
| "flow/mag_ratio_std": 0.21515046246349812, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.07450446423931233, |
| "eval_cos_loss": 0.4634226718917489, |
| "eval_loss": 1.6022505089640617, |
| "eval_mse_loss": 1.3705391697585583, |
| "eval_runtime": 2.8419, |
| "eval_samples_per_second": 703.759, |
| "eval_steps_per_second": 11.26, |
| "flow/cos_sim": 0.5365773290395737, |
| "flow/improvement_ratio": 0.9635819494724274, |
| "flow/mag_ratio_mean": 0.5194354858249426, |
| "flow/mag_ratio_std": 0.21515046246349812, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.08514795913064266, |
| "grad_norm": 0.14975515007972717, |
| "learning_rate": 0.000992564909872628, |
| "loss": 1.6262034177780151, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.08514795913064266, |
| "eval_cos_loss": 0.45912545546889305, |
| "eval_loss": 1.5890175811946392, |
| "eval_mse_loss": 1.3594548553228378, |
| "flow/cos_sim": 0.5408745482563972, |
| "flow/improvement_ratio": 0.9590303134173155, |
| "flow/mag_ratio_mean": 0.5143361240625381, |
| "flow/mag_ratio_std": 0.21537457825616002, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.08514795913064266, |
| "eval_cos_loss": 0.45912545546889305, |
| "eval_loss": 1.5890175811946392, |
| "eval_mse_loss": 1.3594548553228378, |
| "eval_runtime": 2.9302, |
| "eval_samples_per_second": 682.537, |
| "eval_steps_per_second": 10.921, |
| "flow/cos_sim": 0.5408745482563972, |
| "flow/improvement_ratio": 0.9590303134173155, |
| "flow/mag_ratio_mean": 0.5143361240625381, |
| "flow/mag_ratio_std": 0.21537457825616002, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.09579145402197299, |
| "grad_norm": 0.19106586277484894, |
| "learning_rate": 0.000989307950724573, |
| "loss": 1.6214015483856201, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.09579145402197299, |
| "eval_cos_loss": 0.4567577252164483, |
| "eval_loss": 1.5844898335635662, |
| "eval_mse_loss": 1.356110967695713, |
| "flow/cos_sim": 0.5432424321770668, |
| "flow/improvement_ratio": 0.9650511220097542, |
| "flow/mag_ratio_mean": 0.5244949720799923, |
| "flow/mag_ratio_std": 0.21130397450178862, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.09579145402197299, |
| "eval_cos_loss": 0.4567577252164483, |
| "eval_loss": 1.5844898335635662, |
| "eval_mse_loss": 1.356110967695713, |
| "eval_runtime": 3.0508, |
| "eval_samples_per_second": 655.562, |
| "eval_steps_per_second": 10.489, |
| "flow/cos_sim": 0.5432424321770668, |
| "flow/improvement_ratio": 0.9650511220097542, |
| "flow/mag_ratio_mean": 0.5244949720799923, |
| "flow/mag_ratio_std": 0.21130397450178862, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.10643494891330332, |
| "grad_norm": 0.22245089709758759, |
| "learning_rate": 0.000985464388035817, |
| "loss": 1.6132733821868896, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.10643494891330332, |
| "eval_cos_loss": 0.4598613306879997, |
| "eval_loss": 1.589576181024313, |
| "eval_mse_loss": 1.359645515680313, |
| "flow/cos_sim": 0.5401386898010969, |
| "flow/improvement_ratio": 0.9610863700509071, |
| "flow/mag_ratio_mean": 0.5160716716200113, |
| "flow/mag_ratio_std": 0.21545762522146106, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.10643494891330332, |
| "eval_cos_loss": 0.4598613306879997, |
| "eval_loss": 1.589576181024313, |
| "eval_mse_loss": 1.359645515680313, |
| "eval_runtime": 3.1847, |
| "eval_samples_per_second": 627.993, |
| "eval_steps_per_second": 10.048, |
| "flow/cos_sim": 0.5401386898010969, |
| "flow/improvement_ratio": 0.9610863700509071, |
| "flow/mag_ratio_mean": 0.5160716716200113, |
| "flow/mag_ratio_std": 0.21545762522146106, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.11707844380463366, |
| "grad_norm": 0.1567550003528595, |
| "learning_rate": 0.0009810417042745768, |
| "loss": 1.6070518493652344, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.11707844380463366, |
| "eval_cos_loss": 0.4550258554518223, |
| "eval_loss": 1.577816877514124, |
| "eval_mse_loss": 1.350303951650858, |
| "flow/cos_sim": 0.5449741557240486, |
| "flow/improvement_ratio": 0.9648044053465128, |
| "flow/mag_ratio_mean": 0.5290831215679646, |
| "flow/mag_ratio_std": 0.21279342425987124, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.11707844380463366, |
| "eval_cos_loss": 0.4550258554518223, |
| "eval_loss": 1.577816877514124, |
| "eval_mse_loss": 1.350303951650858, |
| "eval_runtime": 2.6352, |
| "eval_samples_per_second": 758.945, |
| "eval_steps_per_second": 12.143, |
| "flow/cos_sim": 0.5449741557240486, |
| "flow/improvement_ratio": 0.9648044053465128, |
| "flow/mag_ratio_mean": 0.5290831215679646, |
| "flow/mag_ratio_std": 0.21279342425987124, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.127721938695964, |
| "grad_norm": 0.19316641986370087, |
| "learning_rate": 0.0009760451753569162, |
| "loss": 1.6028146743774414, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.127721938695964, |
| "eval_cos_loss": 0.45873888209462166, |
| "eval_loss": 1.5860362015664577, |
| "eval_mse_loss": 1.3566667586565018, |
| "flow/cos_sim": 0.5412612538784742, |
| "flow/improvement_ratio": 0.9610528890043497, |
| "flow/mag_ratio_mean": 0.5225661229342222, |
| "flow/mag_ratio_std": 0.2149493475444615, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.127721938695964, |
| "eval_cos_loss": 0.45873888209462166, |
| "eval_loss": 1.5860362015664577, |
| "eval_mse_loss": 1.3566667586565018, |
| "eval_runtime": 2.6091, |
| "eval_samples_per_second": 766.55, |
| "eval_steps_per_second": 12.265, |
| "flow/cos_sim": 0.5412612538784742, |
| "flow/improvement_ratio": 0.9610528890043497, |
| "flow/mag_ratio_mean": 0.5225661229342222, |
| "flow/mag_ratio_std": 0.2149493475444615, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.13836543358729433, |
| "grad_norm": 0.17066629230976105, |
| "learning_rate": 0.000970486470662755, |
| "loss": 1.5989067554473877, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.13836543358729433, |
| "eval_cos_loss": 0.4526587063446641, |
| "eval_loss": 1.5703520886600018, |
| "eval_mse_loss": 1.3440227322280407, |
| "flow/cos_sim": 0.547341376543045, |
| "flow/improvement_ratio": 0.9634687285870314, |
| "flow/mag_ratio_mean": 0.5251006819307804, |
| "flow/mag_ratio_std": 0.2169443154707551, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.13836543358729433, |
| "eval_cos_loss": 0.4526587063446641, |
| "eval_loss": 1.5703520886600018, |
| "eval_mse_loss": 1.3440227322280407, |
| "eval_runtime": 2.6502, |
| "eval_samples_per_second": 754.652, |
| "eval_steps_per_second": 12.074, |
| "flow/cos_sim": 0.547341376543045, |
| "flow/improvement_ratio": 0.9634687285870314, |
| "flow/mag_ratio_mean": 0.5251006819307804, |
| "flow/mag_ratio_std": 0.2169443154707551, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.14900892847862465, |
| "grad_norm": 0.19086262583732605, |
| "learning_rate": 0.0009643613549160033, |
| "loss": 1.5941526889801025, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.14900892847862465, |
| "eval_cos_loss": 0.45674111880362034, |
| "eval_loss": 1.5803881026804447, |
| "eval_mse_loss": 1.3520175516605377, |
| "flow/cos_sim": 0.5432589612901211, |
| "flow/improvement_ratio": 0.9569191709160805, |
| "flow/mag_ratio_mean": 0.5241195531561971, |
| "flow/mag_ratio_std": 0.2207528604194522, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.14900892847862465, |
| "eval_cos_loss": 0.45674111880362034, |
| "eval_loss": 1.5803881026804447, |
| "eval_mse_loss": 1.3520175516605377, |
| "eval_runtime": 2.84, |
| "eval_samples_per_second": 704.229, |
| "eval_steps_per_second": 11.268, |
| "flow/cos_sim": 0.5432589612901211, |
| "flow/improvement_ratio": 0.9569191709160805, |
| "flow/mag_ratio_mean": 0.5241195531561971, |
| "flow/mag_ratio_std": 0.2207528604194522, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.159652423369955, |
| "grad_norm": 0.20660291612148285, |
| "learning_rate": 0.0009576890825691249, |
| "loss": 1.5903245210647583, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.159652423369955, |
| "eval_cos_loss": 0.4470532648265362, |
| "eval_loss": 1.5533855073153973, |
| "eval_mse_loss": 1.329858873039484, |
| "flow/cos_sim": 0.5529466420412064, |
| "flow/improvement_ratio": 0.9680595081299543, |
| "flow/mag_ratio_mean": 0.5352848172187805, |
| "flow/mag_ratio_std": 0.22097993176430464, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.159652423369955, |
| "eval_cos_loss": 0.4470532648265362, |
| "eval_loss": 1.5533855073153973, |
| "eval_mse_loss": 1.329858873039484, |
| "eval_runtime": 2.6578, |
| "eval_samples_per_second": 752.494, |
| "eval_steps_per_second": 12.04, |
| "flow/cos_sim": 0.5529466420412064, |
| "flow/improvement_ratio": 0.9680595081299543, |
| "flow/mag_ratio_mean": 0.5352848172187805, |
| "flow/mag_ratio_std": 0.22097993176430464, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.17029591826128532, |
| "grad_norm": 0.23885692656040192, |
| "learning_rate": 0.0009504645698990064, |
| "loss": 1.589218020439148, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.17029591826128532, |
| "eval_cos_loss": 0.44447089545428753, |
| "eval_loss": 1.5484142042696476, |
| "eval_mse_loss": 1.3261787556111813, |
| "flow/cos_sim": 0.5555290877819061, |
| "flow/improvement_ratio": 0.9635521955788136, |
| "flow/mag_ratio_mean": 0.5299641713500023, |
| "flow/mag_ratio_std": 0.215805409476161, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.17029591826128532, |
| "eval_cos_loss": 0.44447089545428753, |
| "eval_loss": 1.5484142042696476, |
| "eval_mse_loss": 1.3261787556111813, |
| "eval_runtime": 2.6371, |
| "eval_samples_per_second": 758.402, |
| "eval_steps_per_second": 12.134, |
| "flow/cos_sim": 0.5555290877819061, |
| "flow/improvement_ratio": 0.9635521955788136, |
| "flow/mag_ratio_mean": 0.5299641713500023, |
| "flow/mag_ratio_std": 0.215805409476161, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.18093941315261566, |
| "grad_norm": 0.2062983363866806, |
| "learning_rate": 0.0009427105273394636, |
| "loss": 1.585401177406311, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.18093941315261566, |
| "eval_cos_loss": 0.45094432309269905, |
| "eval_loss": 1.5633347816765308, |
| "eval_mse_loss": 1.3378626182675362, |
| "flow/cos_sim": 0.5490557141602039, |
| "flow/improvement_ratio": 0.9632246606051922, |
| "flow/mag_ratio_mean": 0.5234426287934184, |
| "flow/mag_ratio_std": 0.22044725203886628, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.18093941315261566, |
| "eval_cos_loss": 0.45094432309269905, |
| "eval_loss": 1.5633347816765308, |
| "eval_mse_loss": 1.3378626182675362, |
| "eval_runtime": 2.5802, |
| "eval_samples_per_second": 775.125, |
| "eval_steps_per_second": 12.402, |
| "flow/cos_sim": 0.5490557141602039, |
| "flow/improvement_ratio": 0.9632246606051922, |
| "flow/mag_ratio_mean": 0.5234426287934184, |
| "flow/mag_ratio_std": 0.22044725203886628, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.19158290804394598, |
| "grad_norm": 0.15858766436576843, |
| "learning_rate": 0.0009344210469473947, |
| "loss": 1.5826770067214966, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.19158290804394598, |
| "eval_cos_loss": 0.44898632261902094, |
| "eval_loss": 1.5564597770571709, |
| "eval_mse_loss": 1.331966608762741, |
| "flow/cos_sim": 0.5510137844830751, |
| "flow/improvement_ratio": 0.9625816307961941, |
| "flow/mag_ratio_mean": 0.5298811597749591, |
| "flow/mag_ratio_std": 0.22253544814884663, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.19158290804394598, |
| "eval_cos_loss": 0.44898632261902094, |
| "eval_loss": 1.5564597770571709, |
| "eval_mse_loss": 1.331966608762741, |
| "eval_runtime": 2.5531, |
| "eval_samples_per_second": 783.347, |
| "eval_steps_per_second": 12.534, |
| "flow/cos_sim": 0.5510137844830751, |
| "flow/improvement_ratio": 0.9625816307961941, |
| "flow/mag_ratio_mean": 0.5298811597749591, |
| "flow/mag_ratio_std": 0.22253544814884663, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.20222640293527633, |
| "grad_norm": 0.2525703012943268, |
| "learning_rate": 0.0009256133361993658, |
| "loss": 1.5798900127410889, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.20222640293527633, |
| "eval_cos_loss": 0.45141084399074316, |
| "eval_loss": 1.567859135568142, |
| "eval_mse_loss": 1.3421537093818188, |
| "flow/cos_sim": 0.5485891196876764, |
| "flow/improvement_ratio": 0.96523248963058, |
| "flow/mag_ratio_mean": 0.5181732634082437, |
| "flow/mag_ratio_std": 0.22030179109424353, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.20222640293527633, |
| "eval_cos_loss": 0.45141084399074316, |
| "eval_loss": 1.567859135568142, |
| "eval_mse_loss": 1.3421537093818188, |
| "eval_runtime": 2.5625, |
| "eval_samples_per_second": 780.491, |
| "eval_steps_per_second": 12.488, |
| "flow/cos_sim": 0.5485891196876764, |
| "flow/improvement_ratio": 0.96523248963058, |
| "flow/mag_ratio_mean": 0.5181732634082437, |
| "flow/mag_ratio_std": 0.22030179109424353, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.21286989782660665, |
| "grad_norm": 0.18312996625900269, |
| "learning_rate": 0.0009163072432159066, |
| "loss": 1.579535961151123, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.21286989782660665, |
| "eval_cos_loss": 0.45327545143663883, |
| "eval_loss": 1.5679056644439697, |
| "eval_mse_loss": 1.3412679433822632, |
| "flow/cos_sim": 0.5467245355248451, |
| "flow/improvement_ratio": 0.9616729654371738, |
| "flow/mag_ratio_mean": 0.5273217614740133, |
| "flow/mag_ratio_std": 0.2235504975542426, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.21286989782660665, |
| "eval_cos_loss": 0.45327545143663883, |
| "eval_loss": 1.5679056644439697, |
| "eval_mse_loss": 1.3412679433822632, |
| "eval_runtime": 2.5697, |
| "eval_samples_per_second": 778.293, |
| "eval_steps_per_second": 12.453, |
| "flow/cos_sim": 0.5467245355248451, |
| "flow/improvement_ratio": 0.9616729654371738, |
| "flow/mag_ratio_mean": 0.5273217614740133, |
| "flow/mag_ratio_std": 0.2235504975542426, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.223513392717937, |
| "grad_norm": 0.21262691915035248, |
| "learning_rate": 0.0009064956775190607, |
| "loss": 1.577104926109314, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.223513392717937, |
| "eval_cos_loss": 0.4483450762927532, |
| "eval_loss": 1.553330171853304, |
| "eval_mse_loss": 1.329157643020153, |
| "flow/cos_sim": 0.5516549795866013, |
| "flow/improvement_ratio": 0.9627660047262907, |
| "flow/mag_ratio_mean": 0.5387043142691255, |
| "flow/mag_ratio_std": 0.2251730626448989, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.223513392717937, |
| "eval_cos_loss": 0.4483450762927532, |
| "eval_loss": 1.553330171853304, |
| "eval_mse_loss": 1.329157643020153, |
| "eval_runtime": 2.5071, |
| "eval_samples_per_second": 797.724, |
| "eval_steps_per_second": 12.764, |
| "flow/cos_sim": 0.5516549795866013, |
| "flow/improvement_ratio": 0.9627660047262907, |
| "flow/mag_ratio_mean": 0.5387043142691255, |
| "flow/mag_ratio_std": 0.2251730626448989, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.2341568876092673, |
| "grad_norm": 0.17988671362400055, |
| "learning_rate": 0.0008961991942494195, |
| "loss": 1.574266791343689, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.2341568876092673, |
| "eval_cos_loss": 0.44411917496472597, |
| "eval_loss": 1.543789055198431, |
| "eval_mse_loss": 1.3217294700443745, |
| "flow/cos_sim": 0.5558808352798223, |
| "flow/improvement_ratio": 0.9671246875077486, |
| "flow/mag_ratio_mean": 0.5348946927115321, |
| "flow/mag_ratio_std": 0.22368196118623018, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.2341568876092673, |
| "eval_cos_loss": 0.44411917496472597, |
| "eval_loss": 1.543789055198431, |
| "eval_mse_loss": 1.3217294700443745, |
| "eval_runtime": 2.5967, |
| "eval_samples_per_second": 770.215, |
| "eval_steps_per_second": 12.323, |
| "flow/cos_sim": 0.5558808352798223, |
| "flow/improvement_ratio": 0.9671246875077486, |
| "flow/mag_ratio_mean": 0.5348946927115321, |
| "flow/mag_ratio_std": 0.22368196118623018, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.24480038250059766, |
| "grad_norm": 0.22547593712806702, |
| "learning_rate": 0.0008854408194461756, |
| "loss": 1.5733323097229004, |
| "step": 23552 |
| }, |
| { |
| "epoch": 0.24480038250059766, |
| "eval_cos_loss": 0.44172694999724627, |
| "eval_loss": 1.5377833917737007, |
| "eval_mse_loss": 1.316919919103384, |
| "flow/cos_sim": 0.5582730043679476, |
| "flow/improvement_ratio": 0.9642701335251331, |
| "flow/mag_ratio_mean": 0.5346939843147993, |
| "flow/mag_ratio_std": 0.22327208751812577, |
| "step": 23552 |
| }, |
| { |
| "epoch": 0.24480038250059766, |
| "eval_cos_loss": 0.44172694999724627, |
| "eval_loss": 1.5377833917737007, |
| "eval_mse_loss": 1.316919919103384, |
| "eval_runtime": 3.1395, |
| "eval_samples_per_second": 637.04, |
| "eval_steps_per_second": 10.193, |
| "flow/cos_sim": 0.5582730043679476, |
| "flow/improvement_ratio": 0.9642701335251331, |
| "flow/mag_ratio_mean": 0.5346939843147993, |
| "flow/mag_ratio_std": 0.22327208751812577, |
| "step": 23552 |
| }, |
| { |
| "epoch": 0.255443877391928, |
| "grad_norm": 0.2300369143486023, |
| "learning_rate": 0.0008742123561119935, |
| "loss": 1.569944143295288, |
| "step": 24576 |
| }, |
| { |
| "epoch": 0.255443877391928, |
| "eval_cos_loss": 0.447942478582263, |
| "eval_loss": 1.553868442773819, |
| "eval_mse_loss": 1.3298972100019455, |
| "flow/cos_sim": 0.5520575055852532, |
| "flow/improvement_ratio": 0.9638102632015944, |
| "flow/mag_ratio_mean": 0.5306164929643273, |
| "flow/mag_ratio_std": 0.22182104969397187, |
| "step": 24576 |
| }, |
| { |
| "epoch": 0.255443877391928, |
| "eval_cos_loss": 0.447942478582263, |
| "eval_loss": 1.553868442773819, |
| "eval_mse_loss": 1.3298972100019455, |
| "eval_runtime": 2.5886, |
| "eval_samples_per_second": 772.612, |
| "eval_steps_per_second": 12.362, |
| "flow/cos_sim": 0.5520575055852532, |
| "flow/improvement_ratio": 0.9638102632015944, |
| "flow/mag_ratio_mean": 0.5306164929643273, |
| "flow/mag_ratio_std": 0.22182104969397187, |
| "step": 24576 |
| }, |
| { |
| "epoch": 0.26608737228325835, |
| "grad_norm": 0.2177908569574356, |
| "learning_rate": 0.0008625491011983832, |
| "loss": 1.5683772563934326, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.26608737228325835, |
| "eval_cos_loss": 0.45118876080960035, |
| "eval_loss": 1.5609249621629715, |
| "eval_mse_loss": 1.3353305757045746, |
| "flow/cos_sim": 0.5488111022859812, |
| "flow/improvement_ratio": 0.9652206618338823, |
| "flow/mag_ratio_mean": 0.5250881398096681, |
| "flow/mag_ratio_std": 0.22340481635183096, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.26608737228325835, |
| "eval_cos_loss": 0.45118876080960035, |
| "eval_loss": 1.5609249621629715, |
| "eval_mse_loss": 1.3353305757045746, |
| "eval_runtime": 2.5832, |
| "eval_samples_per_second": 774.241, |
| "eval_steps_per_second": 12.388, |
| "flow/cos_sim": 0.5488111022859812, |
| "flow/improvement_ratio": 0.9652206618338823, |
| "flow/mag_ratio_mean": 0.5250881398096681, |
| "flow/mag_ratio_std": 0.22340481635183096, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.27673086717458867, |
| "grad_norm": 0.13252000510692596, |
| "learning_rate": 0.0008504421682637403, |
| "loss": 1.5673582553863525, |
| "step": 26624 |
| }, |
| { |
| "epoch": 0.27673086717458867, |
| "eval_cos_loss": 0.44563145097345114, |
| "eval_loss": 1.5506689585745335, |
| "eval_mse_loss": 1.3278532326221466, |
| "flow/cos_sim": 0.5543686226010323, |
| "flow/improvement_ratio": 0.9666622839868069, |
| "flow/mag_ratio_mean": 0.5269411941990256, |
| "flow/mag_ratio_std": 0.21877468656748533, |
| "step": 26624 |
| }, |
| { |
| "epoch": 0.27673086717458867, |
| "eval_cos_loss": 0.44563145097345114, |
| "eval_loss": 1.5506689585745335, |
| "eval_mse_loss": 1.3278532326221466, |
| "eval_runtime": 2.617, |
| "eval_samples_per_second": 764.244, |
| "eval_steps_per_second": 12.228, |
| "flow/cos_sim": 0.5543686226010323, |
| "flow/improvement_ratio": 0.9666622839868069, |
| "flow/mag_ratio_mean": 0.5269411941990256, |
| "flow/mag_ratio_std": 0.21877468656748533, |
| "step": 26624 |
| }, |
| { |
| "epoch": 0.287374362065919, |
| "grad_norm": 0.2598721981048584, |
| "learning_rate": 0.0008379296157504366, |
| "loss": 1.564971923828125, |
| "step": 27648 |
| }, |
| { |
| "epoch": 0.287374362065919, |
| "eval_cos_loss": 0.4419550793245435, |
| "eval_loss": 1.5396056547760963, |
| "eval_mse_loss": 1.3186281062662601, |
| "flow/cos_sim": 0.5580449867993593, |
| "flow/improvement_ratio": 0.9673310127109289, |
| "flow/mag_ratio_mean": 0.5319117670878768, |
| "flow/mag_ratio_std": 0.22065124148502946, |
| "step": 27648 |
| }, |
| { |
| "epoch": 0.287374362065919, |
| "eval_cos_loss": 0.4419550793245435, |
| "eval_loss": 1.5396056547760963, |
| "eval_mse_loss": 1.3186281062662601, |
| "eval_runtime": 3.0156, |
| "eval_samples_per_second": 663.219, |
| "eval_steps_per_second": 10.612, |
| "flow/cos_sim": 0.5580449867993593, |
| "flow/improvement_ratio": 0.9673310127109289, |
| "flow/mag_ratio_mean": 0.5319117670878768, |
| "flow/mag_ratio_std": 0.22065124148502946, |
| "step": 27648 |
| } |
| ], |
| "logging_steps": 1024, |
| "max_steps": 96209, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|