{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.287374362065919, "eval_steps": 1024, "global_step": 27648, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010643494891330332, "grad_norm": 0.13342437148094177, "learning_rate": 0.0003330078125, "loss": 2.2998437881469727, "step": 1024 }, { "epoch": 0.010643494891330332, "eval_cos_loss": 0.5988449528813362, "eval_loss": 1.9600126259028912, "eval_mse_loss": 1.6605901420116425, "flow/cos_sim": 0.4011551085859537, "flow/improvement_ratio": 0.942170824855566, "flow/mag_ratio_mean": 0.37856073677539825, "flow/mag_ratio_std": 0.14085532305762172, "step": 1024 }, { "epoch": 0.010643494891330332, "eval_cos_loss": 0.5988449528813362, "eval_loss": 1.9600126259028912, "eval_mse_loss": 1.6605901420116425, "eval_runtime": 2.6584, "eval_samples_per_second": 752.329, "eval_steps_per_second": 12.037, "flow/cos_sim": 0.4011551085859537, "flow/improvement_ratio": 0.942170824855566, "flow/mag_ratio_mean": 0.37856073677539825, "flow/mag_ratio_std": 0.14085532305762172, "step": 1024 }, { "epoch": 0.021286989782660665, "grad_norm": 0.25054192543029785, "learning_rate": 0.0006663411458333333, "loss": 1.8492329120635986, "step": 2048 }, { "epoch": 0.021286989782660665, "eval_cos_loss": 0.5117531130090356, "eval_loss": 1.7429817728698254, "eval_mse_loss": 1.4871052131056786, "flow/cos_sim": 0.4882468534633517, "flow/improvement_ratio": 0.9563530795276165, "flow/mag_ratio_mean": 0.47669631242752075, "flow/mag_ratio_std": 0.17675806442275643, "step": 2048 }, { "epoch": 0.021286989782660665, "eval_cos_loss": 0.5117531130090356, "eval_loss": 1.7429817728698254, "eval_mse_loss": 1.4871052131056786, "eval_runtime": 2.511, "eval_samples_per_second": 796.509, "eval_steps_per_second": 12.744, "flow/cos_sim": 0.4882468534633517, "flow/improvement_ratio": 0.9563530795276165, "flow/mag_ratio_mean": 0.47669631242752075, "flow/mag_ratio_std": 0.17675806442275643, "step": 2048 }, { "epoch": 0.031930484673991, "grad_norm": 0.30941224098205566, "learning_rate": 0.0009996744791666667, "loss": 1.730944037437439, "step": 3072 }, { "epoch": 0.031930484673991, "eval_cos_loss": 0.4815286351367831, "eval_loss": 1.6586528308689594, "eval_mse_loss": 1.4178885221481323, "flow/cos_sim": 0.5184714393690228, "flow/improvement_ratio": 0.9605911839753389, "flow/mag_ratio_mean": 0.49818364903330803, "flow/mag_ratio_std": 0.1928270636126399, "step": 3072 }, { "epoch": 0.031930484673991, "eval_cos_loss": 0.4815286351367831, "eval_loss": 1.6586528308689594, "eval_mse_loss": 1.4178885221481323, "eval_runtime": 3.1033, "eval_samples_per_second": 644.485, "eval_steps_per_second": 10.312, "flow/cos_sim": 0.5184714393690228, "flow/improvement_ratio": 0.9605911839753389, "flow/mag_ratio_mean": 0.49818364903330803, "flow/mag_ratio_std": 0.1928270636126399, "step": 3072 }, { "epoch": 0.04257397956532133, "grad_norm": 0.22964967787265778, "learning_rate": 0.0009997023516784352, "loss": 1.6850833892822266, "step": 4096 }, { "epoch": 0.04257397956532133, "eval_cos_loss": 0.476364528760314, "eval_loss": 1.6391540355980396, "eval_mse_loss": 1.4009717665612698, "flow/cos_sim": 0.5236354488879442, "flow/improvement_ratio": 0.9618693646043539, "flow/mag_ratio_mean": 0.5105963433161378, "flow/mag_ratio_std": 0.20592432795092463, "step": 4096 }, { "epoch": 0.04257397956532133, "eval_cos_loss": 0.476364528760314, "eval_loss": 1.6391540355980396, "eval_mse_loss": 1.4009717665612698, "eval_runtime": 2.5129, "eval_samples_per_second": 795.895, "eval_steps_per_second": 12.734, "flow/cos_sim": 0.5236354488879442, "flow/improvement_ratio": 0.9618693646043539, "flow/mag_ratio_mean": 0.5105963433161378, "flow/mag_ratio_std": 0.20592432795092463, "step": 4096 }, { "epoch": 0.05321747445665166, "grad_norm": 0.2645546495914459, "learning_rate": 0.0009988085977910004, "loss": 1.6617510318756104, "step": 5120 }, { "epoch": 0.05321747445665166, "eval_cos_loss": 0.4789119102060795, "eval_loss": 1.645260013639927, "eval_mse_loss": 1.405804067850113, "flow/cos_sim": 0.5210880534723401, "flow/improvement_ratio": 0.9588682930916548, "flow/mag_ratio_mean": 0.504288200289011, "flow/mag_ratio_std": 0.20718340016901493, "step": 5120 }, { "epoch": 0.05321747445665166, "eval_cos_loss": 0.4789119102060795, "eval_loss": 1.645260013639927, "eval_mse_loss": 1.405804067850113, "eval_runtime": 3.0976, "eval_samples_per_second": 645.654, "eval_steps_per_second": 10.33, "flow/cos_sim": 0.5210880534723401, "flow/improvement_ratio": 0.9588682930916548, "flow/mag_ratio_mean": 0.504288200289011, "flow/mag_ratio_std": 0.20718340016901493, "step": 5120 }, { "epoch": 0.063860969347982, "grad_norm": 0.2762889862060547, "learning_rate": 0.0009973198042317873, "loss": 1.645796775817871, "step": 6144 }, { "epoch": 0.063860969347982, "eval_cos_loss": 0.4598818449303508, "eval_loss": 1.5948525853455067, "eval_mse_loss": 1.3649116680026054, "flow/cos_sim": 0.5401182025671005, "flow/improvement_ratio": 0.9647715575993061, "flow/mag_ratio_mean": 0.5178880272433162, "flow/mag_ratio_std": 0.21153279254212976, "step": 6144 }, { "epoch": 0.063860969347982, "eval_cos_loss": 0.4598818449303508, "eval_loss": 1.5948525853455067, "eval_mse_loss": 1.3649116680026054, "eval_runtime": 3.0831, "eval_samples_per_second": 648.695, "eval_steps_per_second": 10.379, "flow/cos_sim": 0.5401182025671005, "flow/improvement_ratio": 0.9647715575993061, "flow/mag_ratio_mean": 0.5178880272433162, "flow/mag_ratio_std": 0.21153279254212976, "step": 6144 }, { "epoch": 0.07450446423931233, "grad_norm": 0.17679959535598755, "learning_rate": 0.0009952377470151526, "loss": 1.6353809833526611, "step": 7168 }, { "epoch": 0.07450446423931233, "eval_cos_loss": 0.4634226718917489, "eval_loss": 1.6022505089640617, "eval_mse_loss": 1.3705391697585583, "flow/cos_sim": 0.5365773290395737, "flow/improvement_ratio": 0.9635819494724274, "flow/mag_ratio_mean": 0.5194354858249426, "flow/mag_ratio_std": 0.21515046246349812, "step": 7168 }, { "epoch": 0.07450446423931233, "eval_cos_loss": 0.4634226718917489, "eval_loss": 1.6022505089640617, "eval_mse_loss": 1.3705391697585583, "eval_runtime": 2.8419, "eval_samples_per_second": 703.759, "eval_steps_per_second": 11.26, "flow/cos_sim": 0.5365773290395737, "flow/improvement_ratio": 0.9635819494724274, "flow/mag_ratio_mean": 0.5194354858249426, "flow/mag_ratio_std": 0.21515046246349812, "step": 7168 }, { "epoch": 0.08514795913064266, "grad_norm": 0.14975515007972717, "learning_rate": 0.000992564909872628, "loss": 1.6262034177780151, "step": 8192 }, { "epoch": 0.08514795913064266, "eval_cos_loss": 0.45912545546889305, "eval_loss": 1.5890175811946392, "eval_mse_loss": 1.3594548553228378, "flow/cos_sim": 0.5408745482563972, "flow/improvement_ratio": 0.9590303134173155, "flow/mag_ratio_mean": 0.5143361240625381, "flow/mag_ratio_std": 0.21537457825616002, "step": 8192 }, { "epoch": 0.08514795913064266, "eval_cos_loss": 0.45912545546889305, "eval_loss": 1.5890175811946392, "eval_mse_loss": 1.3594548553228378, "eval_runtime": 2.9302, "eval_samples_per_second": 682.537, "eval_steps_per_second": 10.921, "flow/cos_sim": 0.5408745482563972, "flow/improvement_ratio": 0.9590303134173155, "flow/mag_ratio_mean": 0.5143361240625381, "flow/mag_ratio_std": 0.21537457825616002, "step": 8192 }, { "epoch": 0.09579145402197299, "grad_norm": 0.19106586277484894, "learning_rate": 0.000989307950724573, "loss": 1.6214015483856201, "step": 9216 }, { "epoch": 0.09579145402197299, "eval_cos_loss": 0.4567577252164483, "eval_loss": 1.5844898335635662, "eval_mse_loss": 1.356110967695713, "flow/cos_sim": 0.5432424321770668, "flow/improvement_ratio": 0.9650511220097542, "flow/mag_ratio_mean": 0.5244949720799923, "flow/mag_ratio_std": 0.21130397450178862, "step": 9216 }, { "epoch": 0.09579145402197299, "eval_cos_loss": 0.4567577252164483, "eval_loss": 1.5844898335635662, "eval_mse_loss": 1.356110967695713, "eval_runtime": 3.0508, "eval_samples_per_second": 655.562, "eval_steps_per_second": 10.489, "flow/cos_sim": 0.5432424321770668, "flow/improvement_ratio": 0.9650511220097542, "flow/mag_ratio_mean": 0.5244949720799923, "flow/mag_ratio_std": 0.21130397450178862, "step": 9216 }, { "epoch": 0.10643494891330332, "grad_norm": 0.22245089709758759, "learning_rate": 0.000985464388035817, "loss": 1.6132733821868896, "step": 10240 }, { "epoch": 0.10643494891330332, "eval_cos_loss": 0.4598613306879997, "eval_loss": 1.589576181024313, "eval_mse_loss": 1.359645515680313, "flow/cos_sim": 0.5401386898010969, "flow/improvement_ratio": 0.9610863700509071, "flow/mag_ratio_mean": 0.5160716716200113, "flow/mag_ratio_std": 0.21545762522146106, "step": 10240 }, { "epoch": 0.10643494891330332, "eval_cos_loss": 0.4598613306879997, "eval_loss": 1.589576181024313, "eval_mse_loss": 1.359645515680313, "eval_runtime": 3.1847, "eval_samples_per_second": 627.993, "eval_steps_per_second": 10.048, "flow/cos_sim": 0.5401386898010969, "flow/improvement_ratio": 0.9610863700509071, "flow/mag_ratio_mean": 0.5160716716200113, "flow/mag_ratio_std": 0.21545762522146106, "step": 10240 }, { "epoch": 0.11707844380463366, "grad_norm": 0.1567550003528595, "learning_rate": 0.0009810417042745768, "loss": 1.6070518493652344, "step": 11264 }, { "epoch": 0.11707844380463366, "eval_cos_loss": 0.4550258554518223, "eval_loss": 1.577816877514124, "eval_mse_loss": 1.350303951650858, "flow/cos_sim": 0.5449741557240486, "flow/improvement_ratio": 0.9648044053465128, "flow/mag_ratio_mean": 0.5290831215679646, "flow/mag_ratio_std": 0.21279342425987124, "step": 11264 }, { "epoch": 0.11707844380463366, "eval_cos_loss": 0.4550258554518223, "eval_loss": 1.577816877514124, "eval_mse_loss": 1.350303951650858, "eval_runtime": 2.6352, "eval_samples_per_second": 758.945, "eval_steps_per_second": 12.143, "flow/cos_sim": 0.5449741557240486, "flow/improvement_ratio": 0.9648044053465128, "flow/mag_ratio_mean": 0.5290831215679646, "flow/mag_ratio_std": 0.21279342425987124, "step": 11264 }, { "epoch": 0.127721938695964, "grad_norm": 0.19316641986370087, "learning_rate": 0.0009760451753569162, "loss": 1.6028146743774414, "step": 12288 }, { "epoch": 0.127721938695964, "eval_cos_loss": 0.45873888209462166, "eval_loss": 1.5860362015664577, "eval_mse_loss": 1.3566667586565018, "flow/cos_sim": 0.5412612538784742, "flow/improvement_ratio": 0.9610528890043497, "flow/mag_ratio_mean": 0.5225661229342222, "flow/mag_ratio_std": 0.2149493475444615, "step": 12288 }, { "epoch": 0.127721938695964, "eval_cos_loss": 0.45873888209462166, "eval_loss": 1.5860362015664577, "eval_mse_loss": 1.3566667586565018, "eval_runtime": 2.6091, "eval_samples_per_second": 766.55, "eval_steps_per_second": 12.265, "flow/cos_sim": 0.5412612538784742, "flow/improvement_ratio": 0.9610528890043497, "flow/mag_ratio_mean": 0.5225661229342222, "flow/mag_ratio_std": 0.2149493475444615, "step": 12288 }, { "epoch": 0.13836543358729433, "grad_norm": 0.17066629230976105, "learning_rate": 0.000970486470662755, "loss": 1.5989067554473877, "step": 13312 }, { "epoch": 0.13836543358729433, "eval_cos_loss": 0.4526587063446641, "eval_loss": 1.5703520886600018, "eval_mse_loss": 1.3440227322280407, "flow/cos_sim": 0.547341376543045, "flow/improvement_ratio": 0.9634687285870314, "flow/mag_ratio_mean": 0.5251006819307804, "flow/mag_ratio_std": 0.2169443154707551, "step": 13312 }, { "epoch": 0.13836543358729433, "eval_cos_loss": 0.4526587063446641, "eval_loss": 1.5703520886600018, "eval_mse_loss": 1.3440227322280407, "eval_runtime": 2.6502, "eval_samples_per_second": 754.652, "eval_steps_per_second": 12.074, "flow/cos_sim": 0.547341376543045, "flow/improvement_ratio": 0.9634687285870314, "flow/mag_ratio_mean": 0.5251006819307804, "flow/mag_ratio_std": 0.2169443154707551, "step": 13312 }, { "epoch": 0.14900892847862465, "grad_norm": 0.19086262583732605, "learning_rate": 0.0009643613549160033, "loss": 1.5941526889801025, "step": 14336 }, { "epoch": 0.14900892847862465, "eval_cos_loss": 0.45674111880362034, "eval_loss": 1.5803881026804447, "eval_mse_loss": 1.3520175516605377, "flow/cos_sim": 0.5432589612901211, "flow/improvement_ratio": 0.9569191709160805, "flow/mag_ratio_mean": 0.5241195531561971, "flow/mag_ratio_std": 0.2207528604194522, "step": 14336 }, { "epoch": 0.14900892847862465, "eval_cos_loss": 0.45674111880362034, "eval_loss": 1.5803881026804447, "eval_mse_loss": 1.3520175516605377, "eval_runtime": 2.84, "eval_samples_per_second": 704.229, "eval_steps_per_second": 11.268, "flow/cos_sim": 0.5432589612901211, "flow/improvement_ratio": 0.9569191709160805, "flow/mag_ratio_mean": 0.5241195531561971, "flow/mag_ratio_std": 0.2207528604194522, "step": 14336 }, { "epoch": 0.159652423369955, "grad_norm": 0.20660291612148285, "learning_rate": 0.0009576890825691249, "loss": 1.5903245210647583, "step": 15360 }, { "epoch": 0.159652423369955, "eval_cos_loss": 0.4470532648265362, "eval_loss": 1.5533855073153973, "eval_mse_loss": 1.329858873039484, "flow/cos_sim": 0.5529466420412064, "flow/improvement_ratio": 0.9680595081299543, "flow/mag_ratio_mean": 0.5352848172187805, "flow/mag_ratio_std": 0.22097993176430464, "step": 15360 }, { "epoch": 0.159652423369955, "eval_cos_loss": 0.4470532648265362, "eval_loss": 1.5533855073153973, "eval_mse_loss": 1.329858873039484, "eval_runtime": 2.6578, "eval_samples_per_second": 752.494, "eval_steps_per_second": 12.04, "flow/cos_sim": 0.5529466420412064, "flow/improvement_ratio": 0.9680595081299543, "flow/mag_ratio_mean": 0.5352848172187805, "flow/mag_ratio_std": 0.22097993176430464, "step": 15360 }, { "epoch": 0.17029591826128532, "grad_norm": 0.23885692656040192, "learning_rate": 0.0009504645698990064, "loss": 1.589218020439148, "step": 16384 }, { "epoch": 0.17029591826128532, "eval_cos_loss": 0.44447089545428753, "eval_loss": 1.5484142042696476, "eval_mse_loss": 1.3261787556111813, "flow/cos_sim": 0.5555290877819061, "flow/improvement_ratio": 0.9635521955788136, "flow/mag_ratio_mean": 0.5299641713500023, "flow/mag_ratio_std": 0.215805409476161, "step": 16384 }, { "epoch": 0.17029591826128532, "eval_cos_loss": 0.44447089545428753, "eval_loss": 1.5484142042696476, "eval_mse_loss": 1.3261787556111813, "eval_runtime": 2.6371, "eval_samples_per_second": 758.402, "eval_steps_per_second": 12.134, "flow/cos_sim": 0.5555290877819061, "flow/improvement_ratio": 0.9635521955788136, "flow/mag_ratio_mean": 0.5299641713500023, "flow/mag_ratio_std": 0.215805409476161, "step": 16384 }, { "epoch": 0.18093941315261566, "grad_norm": 0.2062983363866806, "learning_rate": 0.0009427105273394636, "loss": 1.585401177406311, "step": 17408 }, { "epoch": 0.18093941315261566, "eval_cos_loss": 0.45094432309269905, "eval_loss": 1.5633347816765308, "eval_mse_loss": 1.3378626182675362, "flow/cos_sim": 0.5490557141602039, "flow/improvement_ratio": 0.9632246606051922, "flow/mag_ratio_mean": 0.5234426287934184, "flow/mag_ratio_std": 0.22044725203886628, "step": 17408 }, { "epoch": 0.18093941315261566, "eval_cos_loss": 0.45094432309269905, "eval_loss": 1.5633347816765308, "eval_mse_loss": 1.3378626182675362, "eval_runtime": 2.5802, "eval_samples_per_second": 775.125, "eval_steps_per_second": 12.402, "flow/cos_sim": 0.5490557141602039, "flow/improvement_ratio": 0.9632246606051922, "flow/mag_ratio_mean": 0.5234426287934184, "flow/mag_ratio_std": 0.22044725203886628, "step": 17408 }, { "epoch": 0.19158290804394598, "grad_norm": 0.15858766436576843, "learning_rate": 0.0009344210469473947, "loss": 1.5826770067214966, "step": 18432 }, { "epoch": 0.19158290804394598, "eval_cos_loss": 0.44898632261902094, "eval_loss": 1.5564597770571709, "eval_mse_loss": 1.331966608762741, "flow/cos_sim": 0.5510137844830751, "flow/improvement_ratio": 0.9625816307961941, "flow/mag_ratio_mean": 0.5298811597749591, "flow/mag_ratio_std": 0.22253544814884663, "step": 18432 }, { "epoch": 0.19158290804394598, "eval_cos_loss": 0.44898632261902094, "eval_loss": 1.5564597770571709, "eval_mse_loss": 1.331966608762741, "eval_runtime": 2.5531, "eval_samples_per_second": 783.347, "eval_steps_per_second": 12.534, "flow/cos_sim": 0.5510137844830751, "flow/improvement_ratio": 0.9625816307961941, "flow/mag_ratio_mean": 0.5298811597749591, "flow/mag_ratio_std": 0.22253544814884663, "step": 18432 }, { "epoch": 0.20222640293527633, "grad_norm": 0.2525703012943268, "learning_rate": 0.0009256133361993658, "loss": 1.5798900127410889, "step": 19456 }, { "epoch": 0.20222640293527633, "eval_cos_loss": 0.45141084399074316, "eval_loss": 1.567859135568142, "eval_mse_loss": 1.3421537093818188, "flow/cos_sim": 0.5485891196876764, "flow/improvement_ratio": 0.96523248963058, "flow/mag_ratio_mean": 0.5181732634082437, "flow/mag_ratio_std": 0.22030179109424353, "step": 19456 }, { "epoch": 0.20222640293527633, "eval_cos_loss": 0.45141084399074316, "eval_loss": 1.567859135568142, "eval_mse_loss": 1.3421537093818188, "eval_runtime": 2.5625, "eval_samples_per_second": 780.491, "eval_steps_per_second": 12.488, "flow/cos_sim": 0.5485891196876764, "flow/improvement_ratio": 0.96523248963058, "flow/mag_ratio_mean": 0.5181732634082437, "flow/mag_ratio_std": 0.22030179109424353, "step": 19456 }, { "epoch": 0.21286989782660665, "grad_norm": 0.18312996625900269, "learning_rate": 0.0009163072432159066, "loss": 1.579535961151123, "step": 20480 }, { "epoch": 0.21286989782660665, "eval_cos_loss": 0.45327545143663883, "eval_loss": 1.5679056644439697, "eval_mse_loss": 1.3412679433822632, "flow/cos_sim": 0.5467245355248451, "flow/improvement_ratio": 0.9616729654371738, "flow/mag_ratio_mean": 0.5273217614740133, "flow/mag_ratio_std": 0.2235504975542426, "step": 20480 }, { "epoch": 0.21286989782660665, "eval_cos_loss": 0.45327545143663883, "eval_loss": 1.5679056644439697, "eval_mse_loss": 1.3412679433822632, "eval_runtime": 2.5697, "eval_samples_per_second": 778.293, "eval_steps_per_second": 12.453, "flow/cos_sim": 0.5467245355248451, "flow/improvement_ratio": 0.9616729654371738, "flow/mag_ratio_mean": 0.5273217614740133, "flow/mag_ratio_std": 0.2235504975542426, "step": 20480 }, { "epoch": 0.223513392717937, "grad_norm": 0.21262691915035248, "learning_rate": 0.0009064956775190607, "loss": 1.577104926109314, "step": 21504 }, { "epoch": 0.223513392717937, "eval_cos_loss": 0.4483450762927532, "eval_loss": 1.553330171853304, "eval_mse_loss": 1.329157643020153, "flow/cos_sim": 0.5516549795866013, "flow/improvement_ratio": 0.9627660047262907, "flow/mag_ratio_mean": 0.5387043142691255, "flow/mag_ratio_std": 0.2251730626448989, "step": 21504 }, { "epoch": 0.223513392717937, "eval_cos_loss": 0.4483450762927532, "eval_loss": 1.553330171853304, "eval_mse_loss": 1.329157643020153, "eval_runtime": 2.5071, "eval_samples_per_second": 797.724, "eval_steps_per_second": 12.764, "flow/cos_sim": 0.5516549795866013, "flow/improvement_ratio": 0.9627660047262907, "flow/mag_ratio_mean": 0.5387043142691255, "flow/mag_ratio_std": 0.2251730626448989, "step": 21504 }, { "epoch": 0.2341568876092673, "grad_norm": 0.17988671362400055, "learning_rate": 0.0008961991942494195, "loss": 1.574266791343689, "step": 22528 }, { "epoch": 0.2341568876092673, "eval_cos_loss": 0.44411917496472597, "eval_loss": 1.543789055198431, "eval_mse_loss": 1.3217294700443745, "flow/cos_sim": 0.5558808352798223, "flow/improvement_ratio": 0.9671246875077486, "flow/mag_ratio_mean": 0.5348946927115321, "flow/mag_ratio_std": 0.22368196118623018, "step": 22528 }, { "epoch": 0.2341568876092673, "eval_cos_loss": 0.44411917496472597, "eval_loss": 1.543789055198431, "eval_mse_loss": 1.3217294700443745, "eval_runtime": 2.5967, "eval_samples_per_second": 770.215, "eval_steps_per_second": 12.323, "flow/cos_sim": 0.5558808352798223, "flow/improvement_ratio": 0.9671246875077486, "flow/mag_ratio_mean": 0.5348946927115321, "flow/mag_ratio_std": 0.22368196118623018, "step": 22528 }, { "epoch": 0.24480038250059766, "grad_norm": 0.22547593712806702, "learning_rate": 0.0008854408194461756, "loss": 1.5733323097229004, "step": 23552 }, { "epoch": 0.24480038250059766, "eval_cos_loss": 0.44172694999724627, "eval_loss": 1.5377833917737007, "eval_mse_loss": 1.316919919103384, "flow/cos_sim": 0.5582730043679476, "flow/improvement_ratio": 0.9642701335251331, "flow/mag_ratio_mean": 0.5346939843147993, "flow/mag_ratio_std": 0.22327208751812577, "step": 23552 }, { "epoch": 0.24480038250059766, "eval_cos_loss": 0.44172694999724627, "eval_loss": 1.5377833917737007, "eval_mse_loss": 1.316919919103384, "eval_runtime": 3.1395, "eval_samples_per_second": 637.04, "eval_steps_per_second": 10.193, "flow/cos_sim": 0.5582730043679476, "flow/improvement_ratio": 0.9642701335251331, "flow/mag_ratio_mean": 0.5346939843147993, "flow/mag_ratio_std": 0.22327208751812577, "step": 23552 }, { "epoch": 0.255443877391928, "grad_norm": 0.2300369143486023, "learning_rate": 0.0008742123561119935, "loss": 1.569944143295288, "step": 24576 }, { "epoch": 0.255443877391928, "eval_cos_loss": 0.447942478582263, "eval_loss": 1.553868442773819, "eval_mse_loss": 1.3298972100019455, "flow/cos_sim": 0.5520575055852532, "flow/improvement_ratio": 0.9638102632015944, "flow/mag_ratio_mean": 0.5306164929643273, "flow/mag_ratio_std": 0.22182104969397187, "step": 24576 }, { "epoch": 0.255443877391928, "eval_cos_loss": 0.447942478582263, "eval_loss": 1.553868442773819, "eval_mse_loss": 1.3298972100019455, "eval_runtime": 2.5886, "eval_samples_per_second": 772.612, "eval_steps_per_second": 12.362, "flow/cos_sim": 0.5520575055852532, "flow/improvement_ratio": 0.9638102632015944, "flow/mag_ratio_mean": 0.5306164929643273, "flow/mag_ratio_std": 0.22182104969397187, "step": 24576 }, { "epoch": 0.26608737228325835, "grad_norm": 0.2177908569574356, "learning_rate": 0.0008625491011983832, "loss": 1.5683772563934326, "step": 25600 }, { "epoch": 0.26608737228325835, "eval_cos_loss": 0.45118876080960035, "eval_loss": 1.5609249621629715, "eval_mse_loss": 1.3353305757045746, "flow/cos_sim": 0.5488111022859812, "flow/improvement_ratio": 0.9652206618338823, "flow/mag_ratio_mean": 0.5250881398096681, "flow/mag_ratio_std": 0.22340481635183096, "step": 25600 }, { "epoch": 0.26608737228325835, "eval_cos_loss": 0.45118876080960035, "eval_loss": 1.5609249621629715, "eval_mse_loss": 1.3353305757045746, "eval_runtime": 2.5832, "eval_samples_per_second": 774.241, "eval_steps_per_second": 12.388, "flow/cos_sim": 0.5488111022859812, "flow/improvement_ratio": 0.9652206618338823, "flow/mag_ratio_mean": 0.5250881398096681, "flow/mag_ratio_std": 0.22340481635183096, "step": 25600 }, { "epoch": 0.27673086717458867, "grad_norm": 0.13252000510692596, "learning_rate": 0.0008504421682637403, "loss": 1.5673582553863525, "step": 26624 }, { "epoch": 0.27673086717458867, "eval_cos_loss": 0.44563145097345114, "eval_loss": 1.5506689585745335, "eval_mse_loss": 1.3278532326221466, "flow/cos_sim": 0.5543686226010323, "flow/improvement_ratio": 0.9666622839868069, "flow/mag_ratio_mean": 0.5269411941990256, "flow/mag_ratio_std": 0.21877468656748533, "step": 26624 }, { "epoch": 0.27673086717458867, "eval_cos_loss": 0.44563145097345114, "eval_loss": 1.5506689585745335, "eval_mse_loss": 1.3278532326221466, "eval_runtime": 2.617, "eval_samples_per_second": 764.244, "eval_steps_per_second": 12.228, "flow/cos_sim": 0.5543686226010323, "flow/improvement_ratio": 0.9666622839868069, "flow/mag_ratio_mean": 0.5269411941990256, "flow/mag_ratio_std": 0.21877468656748533, "step": 26624 }, { "epoch": 0.287374362065919, "grad_norm": 0.2598721981048584, "learning_rate": 0.0008379296157504366, "loss": 1.564971923828125, "step": 27648 }, { "epoch": 0.287374362065919, "eval_cos_loss": 0.4419550793245435, "eval_loss": 1.5396056547760963, "eval_mse_loss": 1.3186281062662601, "flow/cos_sim": 0.5580449867993593, "flow/improvement_ratio": 0.9673310127109289, "flow/mag_ratio_mean": 0.5319117670878768, "flow/mag_ratio_std": 0.22065124148502946, "step": 27648 }, { "epoch": 0.287374362065919, "eval_cos_loss": 0.4419550793245435, "eval_loss": 1.5396056547760963, "eval_mse_loss": 1.3186281062662601, "eval_runtime": 3.0156, "eval_samples_per_second": 663.219, "eval_steps_per_second": 10.612, "flow/cos_sim": 0.5580449867993593, "flow/improvement_ratio": 0.9673310127109289, "flow/mag_ratio_mean": 0.5319117670878768, "flow/mag_ratio_std": 0.22065124148502946, "step": 27648 } ], "logging_steps": 1024, "max_steps": 96209, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1024, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }