{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 1024, "global_step": 96209, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010643494891330332, "grad_norm": 0.13342437148094177, "learning_rate": 0.0003330078125, "loss": 2.2998437881469727, "step": 1024 }, { "epoch": 0.010643494891330332, "eval_cos_loss": 0.5988449528813362, "eval_loss": 1.9600126259028912, "eval_mse_loss": 1.6605901420116425, "flow/cos_sim": 0.4011551085859537, "flow/improvement_ratio": 0.942170824855566, "flow/mag_ratio_mean": 0.37856073677539825, "flow/mag_ratio_std": 0.14085532305762172, "step": 1024 }, { "epoch": 0.010643494891330332, "eval_cos_loss": 0.5988449528813362, "eval_loss": 1.9600126259028912, "eval_mse_loss": 1.6605901420116425, "eval_runtime": 2.6584, "eval_samples_per_second": 752.329, "eval_steps_per_second": 12.037, "flow/cos_sim": 0.4011551085859537, "flow/improvement_ratio": 0.942170824855566, "flow/mag_ratio_mean": 0.37856073677539825, "flow/mag_ratio_std": 0.14085532305762172, "step": 1024 }, { "epoch": 0.021286989782660665, "grad_norm": 0.25054192543029785, "learning_rate": 0.0006663411458333333, "loss": 1.8492329120635986, "step": 2048 }, { "epoch": 0.021286989782660665, "eval_cos_loss": 0.5117531130090356, "eval_loss": 1.7429817728698254, "eval_mse_loss": 1.4871052131056786, "flow/cos_sim": 0.4882468534633517, "flow/improvement_ratio": 0.9563530795276165, "flow/mag_ratio_mean": 0.47669631242752075, "flow/mag_ratio_std": 0.17675806442275643, "step": 2048 }, { "epoch": 0.021286989782660665, "eval_cos_loss": 0.5117531130090356, "eval_loss": 1.7429817728698254, "eval_mse_loss": 1.4871052131056786, "eval_runtime": 2.511, "eval_samples_per_second": 796.509, "eval_steps_per_second": 12.744, "flow/cos_sim": 0.4882468534633517, "flow/improvement_ratio": 0.9563530795276165, "flow/mag_ratio_mean": 0.47669631242752075, "flow/mag_ratio_std": 0.17675806442275643, "step": 2048 }, { "epoch": 0.031930484673991, "grad_norm": 0.30941224098205566, "learning_rate": 0.0009996744791666667, "loss": 1.730944037437439, "step": 3072 }, { "epoch": 0.031930484673991, "eval_cos_loss": 0.4815286351367831, "eval_loss": 1.6586528308689594, "eval_mse_loss": 1.4178885221481323, "flow/cos_sim": 0.5184714393690228, "flow/improvement_ratio": 0.9605911839753389, "flow/mag_ratio_mean": 0.49818364903330803, "flow/mag_ratio_std": 0.1928270636126399, "step": 3072 }, { "epoch": 0.031930484673991, "eval_cos_loss": 0.4815286351367831, "eval_loss": 1.6586528308689594, "eval_mse_loss": 1.4178885221481323, "eval_runtime": 3.1033, "eval_samples_per_second": 644.485, "eval_steps_per_second": 10.312, "flow/cos_sim": 0.5184714393690228, "flow/improvement_ratio": 0.9605911839753389, "flow/mag_ratio_mean": 0.49818364903330803, "flow/mag_ratio_std": 0.1928270636126399, "step": 3072 }, { "epoch": 0.04257397956532133, "grad_norm": 0.22964967787265778, "learning_rate": 0.0009997023516784352, "loss": 1.6850833892822266, "step": 4096 }, { "epoch": 0.04257397956532133, "eval_cos_loss": 0.476364528760314, "eval_loss": 1.6391540355980396, "eval_mse_loss": 1.4009717665612698, "flow/cos_sim": 0.5236354488879442, "flow/improvement_ratio": 0.9618693646043539, "flow/mag_ratio_mean": 0.5105963433161378, "flow/mag_ratio_std": 0.20592432795092463, "step": 4096 }, { "epoch": 0.04257397956532133, "eval_cos_loss": 0.476364528760314, "eval_loss": 1.6391540355980396, "eval_mse_loss": 1.4009717665612698, "eval_runtime": 2.5129, "eval_samples_per_second": 795.895, "eval_steps_per_second": 12.734, "flow/cos_sim": 0.5236354488879442, "flow/improvement_ratio": 0.9618693646043539, "flow/mag_ratio_mean": 0.5105963433161378, "flow/mag_ratio_std": 0.20592432795092463, "step": 4096 }, { "epoch": 0.05321747445665166, "grad_norm": 0.2645546495914459, "learning_rate": 0.0009988085977910004, "loss": 1.6617510318756104, "step": 5120 }, { "epoch": 0.05321747445665166, "eval_cos_loss": 0.4789119102060795, "eval_loss": 1.645260013639927, "eval_mse_loss": 1.405804067850113, "flow/cos_sim": 0.5210880534723401, "flow/improvement_ratio": 0.9588682930916548, "flow/mag_ratio_mean": 0.504288200289011, "flow/mag_ratio_std": 0.20718340016901493, "step": 5120 }, { "epoch": 0.05321747445665166, "eval_cos_loss": 0.4789119102060795, "eval_loss": 1.645260013639927, "eval_mse_loss": 1.405804067850113, "eval_runtime": 3.0976, "eval_samples_per_second": 645.654, "eval_steps_per_second": 10.33, "flow/cos_sim": 0.5210880534723401, "flow/improvement_ratio": 0.9588682930916548, "flow/mag_ratio_mean": 0.504288200289011, "flow/mag_ratio_std": 0.20718340016901493, "step": 5120 }, { "epoch": 0.063860969347982, "grad_norm": 0.2762889862060547, "learning_rate": 0.0009973198042317873, "loss": 1.645796775817871, "step": 6144 }, { "epoch": 0.063860969347982, "eval_cos_loss": 0.4598818449303508, "eval_loss": 1.5948525853455067, "eval_mse_loss": 1.3649116680026054, "flow/cos_sim": 0.5401182025671005, "flow/improvement_ratio": 0.9647715575993061, "flow/mag_ratio_mean": 0.5178880272433162, "flow/mag_ratio_std": 0.21153279254212976, "step": 6144 }, { "epoch": 0.063860969347982, "eval_cos_loss": 0.4598818449303508, "eval_loss": 1.5948525853455067, "eval_mse_loss": 1.3649116680026054, "eval_runtime": 3.0831, "eval_samples_per_second": 648.695, "eval_steps_per_second": 10.379, "flow/cos_sim": 0.5401182025671005, "flow/improvement_ratio": 0.9647715575993061, "flow/mag_ratio_mean": 0.5178880272433162, "flow/mag_ratio_std": 0.21153279254212976, "step": 6144 }, { "epoch": 0.07450446423931233, "grad_norm": 0.17679959535598755, "learning_rate": 0.0009952377470151526, "loss": 1.6353809833526611, "step": 7168 }, { "epoch": 0.07450446423931233, "eval_cos_loss": 0.4634226718917489, "eval_loss": 1.6022505089640617, "eval_mse_loss": 1.3705391697585583, "flow/cos_sim": 0.5365773290395737, "flow/improvement_ratio": 0.9635819494724274, "flow/mag_ratio_mean": 0.5194354858249426, "flow/mag_ratio_std": 0.21515046246349812, "step": 7168 }, { "epoch": 0.07450446423931233, "eval_cos_loss": 0.4634226718917489, "eval_loss": 1.6022505089640617, "eval_mse_loss": 1.3705391697585583, "eval_runtime": 2.8419, "eval_samples_per_second": 703.759, "eval_steps_per_second": 11.26, "flow/cos_sim": 0.5365773290395737, "flow/improvement_ratio": 0.9635819494724274, "flow/mag_ratio_mean": 0.5194354858249426, "flow/mag_ratio_std": 0.21515046246349812, "step": 7168 }, { "epoch": 0.08514795913064266, "grad_norm": 0.14975515007972717, "learning_rate": 0.000992564909872628, "loss": 1.6262034177780151, "step": 8192 }, { "epoch": 0.08514795913064266, "eval_cos_loss": 0.45912545546889305, "eval_loss": 1.5890175811946392, "eval_mse_loss": 1.3594548553228378, "flow/cos_sim": 0.5408745482563972, "flow/improvement_ratio": 0.9590303134173155, "flow/mag_ratio_mean": 0.5143361240625381, "flow/mag_ratio_std": 0.21537457825616002, "step": 8192 }, { "epoch": 0.08514795913064266, "eval_cos_loss": 0.45912545546889305, "eval_loss": 1.5890175811946392, "eval_mse_loss": 1.3594548553228378, "eval_runtime": 2.9302, "eval_samples_per_second": 682.537, "eval_steps_per_second": 10.921, "flow/cos_sim": 0.5408745482563972, "flow/improvement_ratio": 0.9590303134173155, "flow/mag_ratio_mean": 0.5143361240625381, "flow/mag_ratio_std": 0.21537457825616002, "step": 8192 }, { "epoch": 0.09579145402197299, "grad_norm": 0.19106586277484894, "learning_rate": 0.000989307950724573, "loss": 1.6214015483856201, "step": 9216 }, { "epoch": 0.09579145402197299, "eval_cos_loss": 0.4567577252164483, "eval_loss": 1.5844898335635662, "eval_mse_loss": 1.356110967695713, "flow/cos_sim": 0.5432424321770668, "flow/improvement_ratio": 0.9650511220097542, "flow/mag_ratio_mean": 0.5244949720799923, "flow/mag_ratio_std": 0.21130397450178862, "step": 9216 }, { "epoch": 0.09579145402197299, "eval_cos_loss": 0.4567577252164483, "eval_loss": 1.5844898335635662, "eval_mse_loss": 1.356110967695713, "eval_runtime": 3.0508, "eval_samples_per_second": 655.562, "eval_steps_per_second": 10.489, "flow/cos_sim": 0.5432424321770668, "flow/improvement_ratio": 0.9650511220097542, "flow/mag_ratio_mean": 0.5244949720799923, "flow/mag_ratio_std": 0.21130397450178862, "step": 9216 }, { "epoch": 0.10643494891330332, "grad_norm": 0.22245089709758759, "learning_rate": 0.000985464388035817, "loss": 1.6132733821868896, "step": 10240 }, { "epoch": 0.10643494891330332, "eval_cos_loss": 0.4598613306879997, "eval_loss": 1.589576181024313, "eval_mse_loss": 1.359645515680313, "flow/cos_sim": 0.5401386898010969, "flow/improvement_ratio": 0.9610863700509071, "flow/mag_ratio_mean": 0.5160716716200113, "flow/mag_ratio_std": 0.21545762522146106, "step": 10240 }, { "epoch": 0.10643494891330332, "eval_cos_loss": 0.4598613306879997, "eval_loss": 1.589576181024313, "eval_mse_loss": 1.359645515680313, "eval_runtime": 3.1847, "eval_samples_per_second": 627.993, "eval_steps_per_second": 10.048, "flow/cos_sim": 0.5401386898010969, "flow/improvement_ratio": 0.9610863700509071, "flow/mag_ratio_mean": 0.5160716716200113, "flow/mag_ratio_std": 0.21545762522146106, "step": 10240 }, { "epoch": 0.11707844380463366, "grad_norm": 0.1567550003528595, "learning_rate": 0.0009810417042745768, "loss": 1.6070518493652344, "step": 11264 }, { "epoch": 0.11707844380463366, "eval_cos_loss": 0.4550258554518223, "eval_loss": 1.577816877514124, "eval_mse_loss": 1.350303951650858, "flow/cos_sim": 0.5449741557240486, "flow/improvement_ratio": 0.9648044053465128, "flow/mag_ratio_mean": 0.5290831215679646, "flow/mag_ratio_std": 0.21279342425987124, "step": 11264 }, { "epoch": 0.11707844380463366, "eval_cos_loss": 0.4550258554518223, "eval_loss": 1.577816877514124, "eval_mse_loss": 1.350303951650858, "eval_runtime": 2.6352, "eval_samples_per_second": 758.945, "eval_steps_per_second": 12.143, "flow/cos_sim": 0.5449741557240486, "flow/improvement_ratio": 0.9648044053465128, "flow/mag_ratio_mean": 0.5290831215679646, "flow/mag_ratio_std": 0.21279342425987124, "step": 11264 }, { "epoch": 0.127721938695964, "grad_norm": 0.19316641986370087, "learning_rate": 0.0009760451753569162, "loss": 1.6028146743774414, "step": 12288 }, { "epoch": 0.127721938695964, "eval_cos_loss": 0.45873888209462166, "eval_loss": 1.5860362015664577, "eval_mse_loss": 1.3566667586565018, "flow/cos_sim": 0.5412612538784742, "flow/improvement_ratio": 0.9610528890043497, "flow/mag_ratio_mean": 0.5225661229342222, "flow/mag_ratio_std": 0.2149493475444615, "step": 12288 }, { "epoch": 0.127721938695964, "eval_cos_loss": 0.45873888209462166, "eval_loss": 1.5860362015664577, "eval_mse_loss": 1.3566667586565018, "eval_runtime": 2.6091, "eval_samples_per_second": 766.55, "eval_steps_per_second": 12.265, "flow/cos_sim": 0.5412612538784742, "flow/improvement_ratio": 0.9610528890043497, "flow/mag_ratio_mean": 0.5225661229342222, "flow/mag_ratio_std": 0.2149493475444615, "step": 12288 }, { "epoch": 0.13836543358729433, "grad_norm": 0.17066629230976105, "learning_rate": 0.000970486470662755, "loss": 1.5989067554473877, "step": 13312 }, { "epoch": 0.13836543358729433, "eval_cos_loss": 0.4526587063446641, "eval_loss": 1.5703520886600018, "eval_mse_loss": 1.3440227322280407, "flow/cos_sim": 0.547341376543045, "flow/improvement_ratio": 0.9634687285870314, "flow/mag_ratio_mean": 0.5251006819307804, "flow/mag_ratio_std": 0.2169443154707551, "step": 13312 }, { "epoch": 0.13836543358729433, "eval_cos_loss": 0.4526587063446641, "eval_loss": 1.5703520886600018, "eval_mse_loss": 1.3440227322280407, "eval_runtime": 2.6502, "eval_samples_per_second": 754.652, "eval_steps_per_second": 12.074, "flow/cos_sim": 0.547341376543045, "flow/improvement_ratio": 0.9634687285870314, "flow/mag_ratio_mean": 0.5251006819307804, "flow/mag_ratio_std": 0.2169443154707551, "step": 13312 }, { "epoch": 0.14900892847862465, "grad_norm": 0.19086262583732605, "learning_rate": 0.0009643613549160033, "loss": 1.5941526889801025, "step": 14336 }, { "epoch": 0.14900892847862465, "eval_cos_loss": 0.45674111880362034, "eval_loss": 1.5803881026804447, "eval_mse_loss": 1.3520175516605377, "flow/cos_sim": 0.5432589612901211, "flow/improvement_ratio": 0.9569191709160805, "flow/mag_ratio_mean": 0.5241195531561971, "flow/mag_ratio_std": 0.2207528604194522, "step": 14336 }, { "epoch": 0.14900892847862465, "eval_cos_loss": 0.45674111880362034, "eval_loss": 1.5803881026804447, "eval_mse_loss": 1.3520175516605377, "eval_runtime": 2.84, "eval_samples_per_second": 704.229, "eval_steps_per_second": 11.268, "flow/cos_sim": 0.5432589612901211, "flow/improvement_ratio": 0.9569191709160805, "flow/mag_ratio_mean": 0.5241195531561971, "flow/mag_ratio_std": 0.2207528604194522, "step": 14336 }, { "epoch": 0.159652423369955, "grad_norm": 0.20660291612148285, "learning_rate": 0.0009576890825691249, "loss": 1.5903245210647583, "step": 15360 }, { "epoch": 0.159652423369955, "eval_cos_loss": 0.4470532648265362, "eval_loss": 1.5533855073153973, "eval_mse_loss": 1.329858873039484, "flow/cos_sim": 0.5529466420412064, "flow/improvement_ratio": 0.9680595081299543, "flow/mag_ratio_mean": 0.5352848172187805, "flow/mag_ratio_std": 0.22097993176430464, "step": 15360 }, { "epoch": 0.159652423369955, "eval_cos_loss": 0.4470532648265362, "eval_loss": 1.5533855073153973, "eval_mse_loss": 1.329858873039484, "eval_runtime": 2.6578, "eval_samples_per_second": 752.494, "eval_steps_per_second": 12.04, "flow/cos_sim": 0.5529466420412064, "flow/improvement_ratio": 0.9680595081299543, "flow/mag_ratio_mean": 0.5352848172187805, "flow/mag_ratio_std": 0.22097993176430464, "step": 15360 }, { "epoch": 0.17029591826128532, "grad_norm": 0.23885692656040192, "learning_rate": 0.0009504645698990064, "loss": 1.589218020439148, "step": 16384 }, { "epoch": 0.17029591826128532, "eval_cos_loss": 0.44447089545428753, "eval_loss": 1.5484142042696476, "eval_mse_loss": 1.3261787556111813, "flow/cos_sim": 0.5555290877819061, "flow/improvement_ratio": 0.9635521955788136, "flow/mag_ratio_mean": 0.5299641713500023, "flow/mag_ratio_std": 0.215805409476161, "step": 16384 }, { "epoch": 0.17029591826128532, "eval_cos_loss": 0.44447089545428753, "eval_loss": 1.5484142042696476, "eval_mse_loss": 1.3261787556111813, "eval_runtime": 2.6371, "eval_samples_per_second": 758.402, "eval_steps_per_second": 12.134, "flow/cos_sim": 0.5555290877819061, "flow/improvement_ratio": 0.9635521955788136, "flow/mag_ratio_mean": 0.5299641713500023, "flow/mag_ratio_std": 0.215805409476161, "step": 16384 }, { "epoch": 0.18093941315261566, "grad_norm": 0.2062983363866806, "learning_rate": 0.0009427105273394636, "loss": 1.585401177406311, "step": 17408 }, { "epoch": 0.18093941315261566, "eval_cos_loss": 0.45094432309269905, "eval_loss": 1.5633347816765308, "eval_mse_loss": 1.3378626182675362, "flow/cos_sim": 0.5490557141602039, "flow/improvement_ratio": 0.9632246606051922, "flow/mag_ratio_mean": 0.5234426287934184, "flow/mag_ratio_std": 0.22044725203886628, "step": 17408 }, { "epoch": 0.18093941315261566, "eval_cos_loss": 0.45094432309269905, "eval_loss": 1.5633347816765308, "eval_mse_loss": 1.3378626182675362, "eval_runtime": 2.5802, "eval_samples_per_second": 775.125, "eval_steps_per_second": 12.402, "flow/cos_sim": 0.5490557141602039, "flow/improvement_ratio": 0.9632246606051922, "flow/mag_ratio_mean": 0.5234426287934184, "flow/mag_ratio_std": 0.22044725203886628, "step": 17408 }, { "epoch": 0.19158290804394598, "grad_norm": 0.15858766436576843, "learning_rate": 0.0009344210469473947, "loss": 1.5826770067214966, "step": 18432 }, { "epoch": 0.19158290804394598, "eval_cos_loss": 0.44898632261902094, "eval_loss": 1.5564597770571709, "eval_mse_loss": 1.331966608762741, "flow/cos_sim": 0.5510137844830751, "flow/improvement_ratio": 0.9625816307961941, "flow/mag_ratio_mean": 0.5298811597749591, "flow/mag_ratio_std": 0.22253544814884663, "step": 18432 }, { "epoch": 0.19158290804394598, "eval_cos_loss": 0.44898632261902094, "eval_loss": 1.5564597770571709, "eval_mse_loss": 1.331966608762741, "eval_runtime": 2.5531, "eval_samples_per_second": 783.347, "eval_steps_per_second": 12.534, "flow/cos_sim": 0.5510137844830751, "flow/improvement_ratio": 0.9625816307961941, "flow/mag_ratio_mean": 0.5298811597749591, "flow/mag_ratio_std": 0.22253544814884663, "step": 18432 }, { "epoch": 0.20222640293527633, "grad_norm": 0.2525703012943268, "learning_rate": 0.0009256133361993658, "loss": 1.5798900127410889, "step": 19456 }, { "epoch": 0.20222640293527633, "eval_cos_loss": 0.45141084399074316, "eval_loss": 1.567859135568142, "eval_mse_loss": 1.3421537093818188, "flow/cos_sim": 0.5485891196876764, "flow/improvement_ratio": 0.96523248963058, "flow/mag_ratio_mean": 0.5181732634082437, "flow/mag_ratio_std": 0.22030179109424353, "step": 19456 }, { "epoch": 0.20222640293527633, "eval_cos_loss": 0.45141084399074316, "eval_loss": 1.567859135568142, "eval_mse_loss": 1.3421537093818188, "eval_runtime": 2.5625, "eval_samples_per_second": 780.491, "eval_steps_per_second": 12.488, "flow/cos_sim": 0.5485891196876764, "flow/improvement_ratio": 0.96523248963058, "flow/mag_ratio_mean": 0.5181732634082437, "flow/mag_ratio_std": 0.22030179109424353, "step": 19456 }, { "epoch": 0.21286989782660665, "grad_norm": 0.18312996625900269, "learning_rate": 0.0009163072432159066, "loss": 1.579535961151123, "step": 20480 }, { "epoch": 0.21286989782660665, "eval_cos_loss": 0.45327545143663883, "eval_loss": 1.5679056644439697, "eval_mse_loss": 1.3412679433822632, "flow/cos_sim": 0.5467245355248451, "flow/improvement_ratio": 0.9616729654371738, "flow/mag_ratio_mean": 0.5273217614740133, "flow/mag_ratio_std": 0.2235504975542426, "step": 20480 }, { "epoch": 0.21286989782660665, "eval_cos_loss": 0.45327545143663883, "eval_loss": 1.5679056644439697, "eval_mse_loss": 1.3412679433822632, "eval_runtime": 2.5697, "eval_samples_per_second": 778.293, "eval_steps_per_second": 12.453, "flow/cos_sim": 0.5467245355248451, "flow/improvement_ratio": 0.9616729654371738, "flow/mag_ratio_mean": 0.5273217614740133, "flow/mag_ratio_std": 0.2235504975542426, "step": 20480 }, { "epoch": 0.223513392717937, "grad_norm": 0.21262691915035248, "learning_rate": 0.0009064956775190607, "loss": 1.577104926109314, "step": 21504 }, { "epoch": 0.223513392717937, "eval_cos_loss": 0.4483450762927532, "eval_loss": 1.553330171853304, "eval_mse_loss": 1.329157643020153, "flow/cos_sim": 0.5516549795866013, "flow/improvement_ratio": 0.9627660047262907, "flow/mag_ratio_mean": 0.5387043142691255, "flow/mag_ratio_std": 0.2251730626448989, "step": 21504 }, { "epoch": 0.223513392717937, "eval_cos_loss": 0.4483450762927532, "eval_loss": 1.553330171853304, "eval_mse_loss": 1.329157643020153, "eval_runtime": 2.5071, "eval_samples_per_second": 797.724, "eval_steps_per_second": 12.764, "flow/cos_sim": 0.5516549795866013, "flow/improvement_ratio": 0.9627660047262907, "flow/mag_ratio_mean": 0.5387043142691255, "flow/mag_ratio_std": 0.2251730626448989, "step": 21504 }, { "epoch": 0.2341568876092673, "grad_norm": 0.17988671362400055, "learning_rate": 0.0008961991942494195, "loss": 1.574266791343689, "step": 22528 }, { "epoch": 0.2341568876092673, "eval_cos_loss": 0.44411917496472597, "eval_loss": 1.543789055198431, "eval_mse_loss": 1.3217294700443745, "flow/cos_sim": 0.5558808352798223, "flow/improvement_ratio": 0.9671246875077486, "flow/mag_ratio_mean": 0.5348946927115321, "flow/mag_ratio_std": 0.22368196118623018, "step": 22528 }, { "epoch": 0.2341568876092673, "eval_cos_loss": 0.44411917496472597, "eval_loss": 1.543789055198431, "eval_mse_loss": 1.3217294700443745, "eval_runtime": 2.5967, "eval_samples_per_second": 770.215, "eval_steps_per_second": 12.323, "flow/cos_sim": 0.5558808352798223, "flow/improvement_ratio": 0.9671246875077486, "flow/mag_ratio_mean": 0.5348946927115321, "flow/mag_ratio_std": 0.22368196118623018, "step": 22528 }, { "epoch": 0.24480038250059766, "grad_norm": 0.22547593712806702, "learning_rate": 0.0008854408194461756, "loss": 1.5733323097229004, "step": 23552 }, { "epoch": 0.24480038250059766, "eval_cos_loss": 0.44172694999724627, "eval_loss": 1.5377833917737007, "eval_mse_loss": 1.316919919103384, "flow/cos_sim": 0.5582730043679476, "flow/improvement_ratio": 0.9642701335251331, "flow/mag_ratio_mean": 0.5346939843147993, "flow/mag_ratio_std": 0.22327208751812577, "step": 23552 }, { "epoch": 0.24480038250059766, "eval_cos_loss": 0.44172694999724627, "eval_loss": 1.5377833917737007, "eval_mse_loss": 1.316919919103384, "eval_runtime": 3.1395, "eval_samples_per_second": 637.04, "eval_steps_per_second": 10.193, "flow/cos_sim": 0.5582730043679476, "flow/improvement_ratio": 0.9642701335251331, "flow/mag_ratio_mean": 0.5346939843147993, "flow/mag_ratio_std": 0.22327208751812577, "step": 23552 }, { "epoch": 0.255443877391928, "grad_norm": 0.2300369143486023, "learning_rate": 0.0008742123561119935, "loss": 1.569944143295288, "step": 24576 }, { "epoch": 0.255443877391928, "eval_cos_loss": 0.447942478582263, "eval_loss": 1.553868442773819, "eval_mse_loss": 1.3298972100019455, "flow/cos_sim": 0.5520575055852532, "flow/improvement_ratio": 0.9638102632015944, "flow/mag_ratio_mean": 0.5306164929643273, "flow/mag_ratio_std": 0.22182104969397187, "step": 24576 }, { "epoch": 0.255443877391928, "eval_cos_loss": 0.447942478582263, "eval_loss": 1.553868442773819, "eval_mse_loss": 1.3298972100019455, "eval_runtime": 2.5886, "eval_samples_per_second": 772.612, "eval_steps_per_second": 12.362, "flow/cos_sim": 0.5520575055852532, "flow/improvement_ratio": 0.9638102632015944, "flow/mag_ratio_mean": 0.5306164929643273, "flow/mag_ratio_std": 0.22182104969397187, "step": 24576 }, { "epoch": 0.26608737228325835, "grad_norm": 0.2177908569574356, "learning_rate": 0.0008625491011983832, "loss": 1.5683772563934326, "step": 25600 }, { "epoch": 0.26608737228325835, "eval_cos_loss": 0.45118876080960035, "eval_loss": 1.5609249621629715, "eval_mse_loss": 1.3353305757045746, "flow/cos_sim": 0.5488111022859812, "flow/improvement_ratio": 0.9652206618338823, "flow/mag_ratio_mean": 0.5250881398096681, "flow/mag_ratio_std": 0.22340481635183096, "step": 25600 }, { "epoch": 0.26608737228325835, "eval_cos_loss": 0.45118876080960035, "eval_loss": 1.5609249621629715, "eval_mse_loss": 1.3353305757045746, "eval_runtime": 2.5832, "eval_samples_per_second": 774.241, "eval_steps_per_second": 12.388, "flow/cos_sim": 0.5488111022859812, "flow/improvement_ratio": 0.9652206618338823, "flow/mag_ratio_mean": 0.5250881398096681, "flow/mag_ratio_std": 0.22340481635183096, "step": 25600 }, { "epoch": 0.27673086717458867, "grad_norm": 0.13252000510692596, "learning_rate": 0.0008504421682637403, "loss": 1.5673582553863525, "step": 26624 }, { "epoch": 0.27673086717458867, "eval_cos_loss": 0.44563145097345114, "eval_loss": 1.5506689585745335, "eval_mse_loss": 1.3278532326221466, "flow/cos_sim": 0.5543686226010323, "flow/improvement_ratio": 0.9666622839868069, "flow/mag_ratio_mean": 0.5269411941990256, "flow/mag_ratio_std": 0.21877468656748533, "step": 26624 }, { "epoch": 0.27673086717458867, "eval_cos_loss": 0.44563145097345114, "eval_loss": 1.5506689585745335, "eval_mse_loss": 1.3278532326221466, "eval_runtime": 2.617, "eval_samples_per_second": 764.244, "eval_steps_per_second": 12.228, "flow/cos_sim": 0.5543686226010323, "flow/improvement_ratio": 0.9666622839868069, "flow/mag_ratio_mean": 0.5269411941990256, "flow/mag_ratio_std": 0.21877468656748533, "step": 26624 }, { "epoch": 0.287374362065919, "grad_norm": 0.2598721981048584, "learning_rate": 0.0008379296157504366, "loss": 1.564971923828125, "step": 27648 }, { "epoch": 0.287374362065919, "eval_cos_loss": 0.4419550793245435, "eval_loss": 1.5396056547760963, "eval_mse_loss": 1.3186281062662601, "flow/cos_sim": 0.5580449867993593, "flow/improvement_ratio": 0.9673310127109289, "flow/mag_ratio_mean": 0.5319117670878768, "flow/mag_ratio_std": 0.22065124148502946, "step": 27648 }, { "epoch": 0.287374362065919, "eval_cos_loss": 0.4419550793245435, "eval_loss": 1.5396056547760963, "eval_mse_loss": 1.3186281062662601, "eval_runtime": 3.0156, "eval_samples_per_second": 663.219, "eval_steps_per_second": 10.612, "flow/cos_sim": 0.5580449867993593, "flow/improvement_ratio": 0.9673310127109289, "flow/mag_ratio_mean": 0.5319117670878768, "flow/mag_ratio_std": 0.22065124148502946, "step": 27648 }, { "epoch": 0.2980178569572493, "grad_norm": 0.274239718914032, "learning_rate": 0.0008250147265053921, "loss": 1.564111590385437, "step": 28672 }, { "epoch": 0.2980178569572493, "eval_cos_loss": 0.44205061066895723, "eval_loss": 1.5401594452559948, "eval_mse_loss": 1.319134145975113, "flow/cos_sim": 0.5579493436962366, "flow/improvement_ratio": 0.9671094436198473, "flow/mag_ratio_mean": 0.5367719177156687, "flow/mag_ratio_std": 0.2205441533587873, "step": 28672 }, { "epoch": 0.2980178569572493, "eval_cos_loss": 0.44205061066895723, "eval_loss": 1.5401594452559948, "eval_mse_loss": 1.319134145975113, "eval_runtime": 2.6525, "eval_samples_per_second": 754.016, "eval_steps_per_second": 12.064, "flow/cos_sim": 0.5579493436962366, "flow/improvement_ratio": 0.9671094436198473, "flow/mag_ratio_mean": 0.5367719177156687, "flow/mag_ratio_std": 0.2205441533587873, "step": 28672 }, { "epoch": 0.3086613518485797, "grad_norm": 0.2095516473054886, "learning_rate": 0.000811699689939724, "loss": 1.5625946521759033, "step": 29696 }, { "epoch": 0.3086613518485797, "eval_cos_loss": 0.44621053244918585, "eval_loss": 1.5496392995119095, "eval_mse_loss": 1.3265340402722359, "flow/cos_sim": 0.5537894666194916, "flow/improvement_ratio": 0.9614951889961958, "flow/mag_ratio_mean": 0.5277672996744514, "flow/mag_ratio_std": 0.22011788561940193, "step": 29696 }, { "epoch": 0.3086613518485797, "eval_cos_loss": 0.44621053244918585, "eval_loss": 1.5496392995119095, "eval_mse_loss": 1.3265340402722359, "eval_runtime": 2.5404, "eval_samples_per_second": 787.269, "eval_steps_per_second": 12.596, "flow/cos_sim": 0.5537894666194916, "flow/improvement_ratio": 0.9614951889961958, "flow/mag_ratio_mean": 0.5277672996744514, "flow/mag_ratio_std": 0.22011788561940193, "step": 29696 }, { "epoch": 0.31930484673991, "grad_norm": 0.15808935463428497, "learning_rate": 0.0007980128200054721, "loss": 1.5646651983261108, "step": 30720 }, { "epoch": 0.31930484673991, "eval_cos_loss": 0.44148214533925056, "eval_loss": 1.5364415682852268, "eval_mse_loss": 1.3157004974782467, "flow/cos_sim": 0.5585179291665554, "flow/improvement_ratio": 0.9648495689034462, "flow/mag_ratio_mean": 0.537518884986639, "flow/mag_ratio_std": 0.22350562876090407, "step": 30720 }, { "epoch": 0.31930484673991, "eval_cos_loss": 0.44148214533925056, "eval_loss": 1.5364415682852268, "eval_mse_loss": 1.3157004974782467, "eval_runtime": 2.5821, "eval_samples_per_second": 774.564, "eval_steps_per_second": 12.393, "flow/cos_sim": 0.5585179291665554, "flow/improvement_ratio": 0.9648495689034462, "flow/mag_ratio_mean": 0.537518884986639, "flow/mag_ratio_std": 0.22350562876090407, "step": 30720 }, { "epoch": 0.3299483416312403, "grad_norm": 0.1921176314353943, "learning_rate": 0.0007839843253324567, "loss": 1.563474416732788, "step": 31744 }, { "epoch": 0.3299483416312403, "eval_cos_loss": 0.4404078619554639, "eval_loss": 1.534349039196968, "eval_mse_loss": 1.3141451105475426, "flow/cos_sim": 0.5595921669155359, "flow/improvement_ratio": 0.9649890139698982, "flow/mag_ratio_mean": 0.5285989735275507, "flow/mag_ratio_std": 0.22230371600016952, "step": 31744 }, { "epoch": 0.3299483416312403, "eval_cos_loss": 0.4404078619554639, "eval_loss": 1.534349039196968, "eval_mse_loss": 1.3141451105475426, "eval_runtime": 2.5419, "eval_samples_per_second": 786.808, "eval_steps_per_second": 12.589, "flow/cos_sim": 0.5595921669155359, "flow/improvement_ratio": 0.9649890139698982, "flow/mag_ratio_mean": 0.5285989735275507, "flow/mag_ratio_std": 0.22230371600016952, "step": 31744 }, { "epoch": 0.34059183652257063, "grad_norm": 0.19485324621200562, "learning_rate": 0.0007696035173607825, "loss": 1.5625982284545898, "step": 32768 }, { "epoch": 0.34059183652257063, "eval_cos_loss": 0.4499282343313098, "eval_loss": 1.5567349456250668, "eval_mse_loss": 1.3317708261311054, "flow/cos_sim": 0.5500718057155609, "flow/improvement_ratio": 0.9645203202962875, "flow/mag_ratio_mean": 0.5316947773098946, "flow/mag_ratio_std": 0.22561145247891545, "step": 32768 }, { "epoch": 0.34059183652257063, "eval_cos_loss": 0.4499282343313098, "eval_loss": 1.5567349456250668, "eval_mse_loss": 1.3317708261311054, "eval_runtime": 2.5397, "eval_samples_per_second": 787.504, "eval_steps_per_second": 12.6, "flow/cos_sim": 0.5500718057155609, "flow/improvement_ratio": 0.9645203202962875, "flow/mag_ratio_mean": 0.5316947773098946, "flow/mag_ratio_std": 0.22561145247891545, "step": 32768 }, { "epoch": 0.351235331413901, "grad_norm": 0.18854600191116333, "learning_rate": 0.0007549156025151, "loss": 1.5604270696640015, "step": 33792 }, { "epoch": 0.351235331413901, "eval_cos_loss": 0.44613189715892076, "eval_loss": 1.5455855540931225, "eval_mse_loss": 1.322519600391388, "flow/cos_sim": 0.5538681279867887, "flow/improvement_ratio": 0.9590773209929466, "flow/mag_ratio_mean": 0.5333189619705081, "flow/mag_ratio_std": 0.22768286149948835, "step": 33792 }, { "epoch": 0.351235331413901, "eval_cos_loss": 0.44613189715892076, "eval_loss": 1.5455855540931225, "eval_mse_loss": 1.322519600391388, "eval_runtime": 2.5511, "eval_samples_per_second": 783.987, "eval_steps_per_second": 12.544, "flow/cos_sim": 0.5538681279867887, "flow/improvement_ratio": 0.9590773209929466, "flow/mag_ratio_mean": 0.5333189619705081, "flow/mag_ratio_std": 0.22768286149948835, "step": 33792 }, { "epoch": 0.3618788263052313, "grad_norm": 0.1909618228673935, "learning_rate": 0.0007399093898115421, "loss": 1.559531807899475, "step": 34816 }, { "epoch": 0.3618788263052313, "eval_cos_loss": 0.4420803328976035, "eval_loss": 1.5389960557222366, "eval_mse_loss": 1.3179558925330639, "flow/cos_sim": 0.5579197406768799, "flow/improvement_ratio": 0.959646550938487, "flow/mag_ratio_mean": 0.5326429791748524, "flow/mag_ratio_std": 0.22111017350107431, "step": 34816 }, { "epoch": 0.3618788263052313, "eval_cos_loss": 0.4420803328976035, "eval_loss": 1.5389960557222366, "eval_mse_loss": 1.3179558925330639, "eval_runtime": 2.5395, "eval_samples_per_second": 787.558, "eval_steps_per_second": 12.601, "flow/cos_sim": 0.5579197406768799, "flow/improvement_ratio": 0.959646550938487, "flow/mag_ratio_mean": 0.5326429791748524, "flow/mag_ratio_std": 0.22111017350107431, "step": 34816 }, { "epoch": 0.37252232119656165, "grad_norm": 0.1629696786403656, "learning_rate": 0.0007246320516499633, "loss": 1.5580956935882568, "step": 35840 }, { "epoch": 0.37252232119656165, "eval_cos_loss": 0.44205798115581274, "eval_loss": 1.5348509810864925, "eval_mse_loss": 1.3138219900429249, "flow/cos_sim": 0.5579419694840908, "flow/improvement_ratio": 0.9621348176151514, "flow/mag_ratio_mean": 0.5318824276328087, "flow/mag_ratio_std": 0.226469362154603, "step": 35840 }, { "epoch": 0.37252232119656165, "eval_cos_loss": 0.44205798115581274, "eval_loss": 1.5348509810864925, "eval_mse_loss": 1.3138219900429249, "eval_runtime": 2.557, "eval_samples_per_second": 782.177, "eval_steps_per_second": 12.515, "flow/cos_sim": 0.5579419694840908, "flow/improvement_ratio": 0.9621348176151514, "flow/mag_ratio_mean": 0.5318824276328087, "flow/mag_ratio_std": 0.226469362154603, "step": 35840 }, { "epoch": 0.38316581608789196, "grad_norm": 0.2269536405801773, "learning_rate": 0.0007090719479543767, "loss": 1.557045578956604, "step": 36864 }, { "epoch": 0.38316581608789196, "eval_cos_loss": 0.4411419341340661, "eval_loss": 1.5364714972674847, "eval_mse_loss": 1.315900530666113, "flow/cos_sim": 0.5588581711053848, "flow/improvement_ratio": 0.965403363108635, "flow/mag_ratio_mean": 0.5407936815172434, "flow/mag_ratio_std": 0.22326642088592052, "step": 36864 }, { "epoch": 0.38316581608789196, "eval_cos_loss": 0.4411419341340661, "eval_loss": 1.5364714972674847, "eval_mse_loss": 1.315900530666113, "eval_runtime": 2.5319, "eval_samples_per_second": 789.932, "eval_steps_per_second": 12.639, "flow/cos_sim": 0.5588581711053848, "flow/improvement_ratio": 0.965403363108635, "flow/mag_ratio_mean": 0.5407936815172434, "flow/mag_ratio_std": 0.22326642088592052, "step": 36864 }, { "epoch": 0.39380931097922234, "grad_norm": 0.18450024724006653, "learning_rate": 0.0006932779922946351, "loss": 1.5578693151474, "step": 37888 }, { "epoch": 0.39380931097922234, "eval_cos_loss": 0.44131703954190016, "eval_loss": 1.5340029932558537, "eval_mse_loss": 1.3133444860577583, "flow/cos_sim": 0.5586829409003258, "flow/improvement_ratio": 0.9628860391676426, "flow/mag_ratio_mean": 0.537332147359848, "flow/mag_ratio_std": 0.22678001504391432, "step": 37888 }, { "epoch": 0.39380931097922234, "eval_cos_loss": 0.44131703954190016, "eval_loss": 1.5340029932558537, "eval_mse_loss": 1.3133444860577583, "eval_runtime": 2.555, "eval_samples_per_second": 782.783, "eval_steps_per_second": 12.525, "flow/cos_sim": 0.5586829409003258, "flow/improvement_ratio": 0.9628860391676426, "flow/mag_ratio_mean": 0.537332147359848, "flow/mag_ratio_std": 0.22678001504391432, "step": 37888 }, { "epoch": 0.40445280587055266, "grad_norm": 0.20654521882534027, "learning_rate": 0.0006772381509746807, "loss": 1.5568833351135254, "step": 38912 }, { "epoch": 0.40445280587055266, "eval_cos_loss": 0.4440508605912328, "eval_loss": 1.540926594287157, "eval_mse_loss": 1.3189011700451374, "flow/cos_sim": 0.5559491030871868, "flow/improvement_ratio": 0.964597575366497, "flow/mag_ratio_mean": 0.5335862170904875, "flow/mag_ratio_std": 0.22821834543719888, "step": 38912 }, { "epoch": 0.40445280587055266, "eval_cos_loss": 0.4440508605912328, "eval_loss": 1.540926594287157, "eval_mse_loss": 1.3189011700451374, "eval_runtime": 2.9265, "eval_samples_per_second": 683.411, "eval_steps_per_second": 10.935, "flow/cos_sim": 0.5559491030871868, "flow/improvement_ratio": 0.964597575366497, "flow/mag_ratio_mean": 0.5335862170904875, "flow/mag_ratio_std": 0.22821834543719888, "step": 38912 }, { "epoch": 0.415096300761883, "grad_norm": 0.20580987632274628, "learning_rate": 0.0006609868783930164, "loss": 1.5565650463104248, "step": 39936 }, { "epoch": 0.415096300761883, "eval_cos_loss": 0.4446534486487508, "eval_loss": 1.5423276983201504, "eval_mse_loss": 1.3200009688735008, "flow/cos_sim": 0.5553465932607651, "flow/improvement_ratio": 0.9601697400212288, "flow/mag_ratio_mean": 0.5308061949908733, "flow/mag_ratio_std": 0.22724535362794995, "step": 39936 }, { "epoch": 0.415096300761883, "eval_cos_loss": 0.4446534486487508, "eval_loss": 1.5423276983201504, "eval_mse_loss": 1.3200009688735008, "eval_runtime": 2.5315, "eval_samples_per_second": 790.058, "eval_steps_per_second": 12.641, "flow/cos_sim": 0.5553465932607651, "flow/improvement_ratio": 0.9601697400212288, "flow/mag_ratio_mean": 0.5308061949908733, "flow/mag_ratio_std": 0.22724535362794995, "step": 39936 }, { "epoch": 0.4257397956532133, "grad_norm": 0.17006264626979828, "learning_rate": 0.0006445597062966236, "loss": 1.5565887689590454, "step": 40960 }, { "epoch": 0.4257397956532133, "eval_cos_loss": 0.4380533881485462, "eval_loss": 1.5283529199659824, "eval_mse_loss": 1.3093262203037739, "flow/cos_sim": 0.5619466044008732, "flow/improvement_ratio": 0.9661596808582544, "flow/mag_ratio_mean": 0.5353248585015535, "flow/mag_ratio_std": 0.22140436619520187, "step": 40960 }, { "epoch": 0.4257397956532133, "eval_cos_loss": 0.4380533881485462, "eval_loss": 1.5283529199659824, "eval_mse_loss": 1.3093262203037739, "eval_runtime": 2.6667, "eval_samples_per_second": 749.988, "eval_steps_per_second": 12.0, "flow/cos_sim": 0.5619466044008732, "flow/improvement_ratio": 0.9661596808582544, "flow/mag_ratio_mean": 0.5353248585015535, "flow/mag_ratio_std": 0.22140436619520187, "step": 40960 }, { "epoch": 0.43638329054454367, "grad_norm": 0.2264794260263443, "learning_rate": 0.0006279604223844502, "loss": 1.5556617975234985, "step": 41984 }, { "epoch": 0.43638329054454367, "eval_cos_loss": 0.4411089513450861, "eval_loss": 1.5351563543081284, "eval_mse_loss": 1.3146018758416176, "flow/cos_sim": 0.5588910467922688, "flow/improvement_ratio": 0.9700996112078428, "flow/mag_ratio_mean": 0.535472328774631, "flow/mag_ratio_std": 0.2249652906320989, "step": 41984 }, { "epoch": 0.43638329054454367, "eval_cos_loss": 0.4411089513450861, "eval_loss": 1.5351563543081284, "eval_mse_loss": 1.3146018758416176, "eval_runtime": 2.6645, "eval_samples_per_second": 750.607, "eval_steps_per_second": 12.01, "flow/cos_sim": 0.5588910467922688, "flow/improvement_ratio": 0.9700996112078428, "flow/mag_ratio_mean": 0.535472328774631, "flow/mag_ratio_std": 0.2249652906320989, "step": 41984 }, { "epoch": 0.447026785435874, "grad_norm": 0.27407148480415344, "learning_rate": 0.0006111923466049098, "loss": 1.5525274276733398, "step": 43008 }, { "epoch": 0.447026785435874, "eval_cos_loss": 0.43889701180160046, "eval_loss": 1.5268253944814205, "eval_mse_loss": 1.3073768950998783, "flow/cos_sim": 0.561102925799787, "flow/improvement_ratio": 0.9647987205535173, "flow/mag_ratio_mean": 0.5332341426983476, "flow/mag_ratio_std": 0.22829985432326794, "step": 43008 }, { "epoch": 0.447026785435874, "eval_cos_loss": 0.43889701180160046, "eval_loss": 1.5268253944814205, "eval_mse_loss": 1.3073768950998783, "eval_runtime": 2.7939, "eval_samples_per_second": 715.846, "eval_steps_per_second": 11.454, "flow/cos_sim": 0.561102925799787, "flow/improvement_ratio": 0.9647987205535173, "flow/mag_ratio_mean": 0.5332341426983476, "flow/mag_ratio_std": 0.22829985432326794, "step": 43008 }, { "epoch": 0.4576702803272043, "grad_norm": 0.1399686485528946, "learning_rate": 0.0005942916270463306, "loss": 1.5545454025268555, "step": 44032 }, { "epoch": 0.4576702803272043, "eval_cos_loss": 0.44924431946128607, "eval_loss": 1.553151711821556, "eval_mse_loss": 1.3285295516252518, "flow/cos_sim": 0.550755743868649, "flow/improvement_ratio": 0.9646315854042768, "flow/mag_ratio_mean": 0.5285015730187297, "flow/mag_ratio_std": 0.22717531491070986, "step": 44032 }, { "epoch": 0.4576702803272043, "eval_cos_loss": 0.44924431946128607, "eval_loss": 1.553151711821556, "eval_mse_loss": 1.3285295516252518, "eval_runtime": 2.6015, "eval_samples_per_second": 768.789, "eval_steps_per_second": 12.301, "flow/cos_sim": 0.550755743868649, "flow/improvement_ratio": 0.9646315854042768, "flow/mag_ratio_mean": 0.5285015730187297, "flow/mag_ratio_std": 0.22717531491070986, "step": 44032 }, { "epoch": 0.4683137752185346, "grad_norm": 0.17331954836845398, "learning_rate": 0.0005772784249462365, "loss": 1.5531715154647827, "step": 45056 }, { "epoch": 0.4683137752185346, "eval_cos_loss": 0.44831305276602507, "eval_loss": 1.55314514413476, "eval_mse_loss": 1.328988615423441, "flow/cos_sim": 0.5516869705170393, "flow/improvement_ratio": 0.9640028644353151, "flow/mag_ratio_mean": 0.53109060972929, "flow/mag_ratio_std": 0.22524388320744038, "step": 45056 }, { "epoch": 0.4683137752185346, "eval_cos_loss": 0.44831305276602507, "eval_loss": 1.55314514413476, "eval_mse_loss": 1.328988615423441, "eval_runtime": 2.6383, "eval_samples_per_second": 758.058, "eval_steps_per_second": 12.129, "flow/cos_sim": 0.5516869705170393, "flow/improvement_ratio": 0.9640028644353151, "flow/mag_ratio_mean": 0.53109060972929, "flow/mag_ratio_std": 0.22524388320744038, "step": 45056 }, { "epoch": 0.478957270109865, "grad_norm": 0.20901691913604736, "learning_rate": 0.0005601730357250316, "loss": 1.5534908771514893, "step": 46080 }, { "epoch": 0.478957270109865, "eval_cos_loss": 0.4380967328324914, "eval_loss": 1.5263510905206203, "eval_mse_loss": 1.3073027282953262, "flow/cos_sim": 0.5619033649563789, "flow/improvement_ratio": 0.9646317362785339, "flow/mag_ratio_mean": 0.5367344031110406, "flow/mag_ratio_std": 0.22356789046898484, "step": 46080 }, { "epoch": 0.478957270109865, "eval_cos_loss": 0.4380967328324914, "eval_loss": 1.5263510905206203, "eval_mse_loss": 1.3073027282953262, "eval_runtime": 3.1077, "eval_samples_per_second": 643.555, "eval_steps_per_second": 10.297, "flow/cos_sim": 0.5619033649563789, "flow/improvement_ratio": 0.9646317362785339, "flow/mag_ratio_mean": 0.5367344031110406, "flow/mag_ratio_std": 0.22356789046898484, "step": 46080 }, { "epoch": 0.4896007650011953, "grad_norm": 0.18614411354064941, "learning_rate": 0.0005430126677168879, "loss": 1.5517550706863403, "step": 47104 }, { "epoch": 0.4896007650011953, "eval_cos_loss": 0.4381159236654639, "eval_loss": 1.5279735252261162, "eval_mse_loss": 1.308915562927723, "flow/cos_sim": 0.5618840865790844, "flow/improvement_ratio": 0.9676383044570684, "flow/mag_ratio_mean": 0.5347359916195273, "flow/mag_ratio_std": 0.22025129199028015, "step": 47104 }, { "epoch": 0.4896007650011953, "eval_cos_loss": 0.4381159236654639, "eval_loss": 1.5279735252261162, "eval_mse_loss": 1.308915562927723, "eval_runtime": 2.5963, "eval_samples_per_second": 770.338, "eval_steps_per_second": 12.325, "flow/cos_sim": 0.5618840865790844, "flow/improvement_ratio": 0.9676383044570684, "flow/mag_ratio_mean": 0.5347359916195273, "flow/mag_ratio_std": 0.22025129199028015, "step": 47104 }, { "epoch": 0.5002442598925256, "grad_norm": 0.20864352583885193, "learning_rate": 0.0005257842461318475, "loss": 1.5500738620758057, "step": 48128 }, { "epoch": 0.5002442598925256, "eval_cos_loss": 0.4427802488207817, "eval_loss": 1.5378683991730213, "eval_mse_loss": 1.3164782784879208, "flow/cos_sim": 0.5572197437286377, "flow/improvement_ratio": 0.9660468604415655, "flow/mag_ratio_mean": 0.5358876623213291, "flow/mag_ratio_std": 0.22517190361395478, "step": 48128 }, { "epoch": 0.5002442598925256, "eval_cos_loss": 0.4427802488207817, "eval_loss": 1.5378683991730213, "eval_mse_loss": 1.3164782784879208, "eval_runtime": 2.7741, "eval_samples_per_second": 720.965, "eval_steps_per_second": 11.535, "flow/cos_sim": 0.5572197437286377, "flow/improvement_ratio": 0.9660468604415655, "flow/mag_ratio_mean": 0.5358876623213291, "flow/mag_ratio_std": 0.22517190361395478, "step": 48128 }, { "epoch": 0.510887754783856, "grad_norm": 0.18199937045574188, "learning_rate": 0.0005085250659563913, "loss": 1.5504491329193115, "step": 49152 }, { "epoch": 0.510887754783856, "eval_cos_loss": 0.4387433025985956, "eval_loss": 1.5289665646851063, "eval_mse_loss": 1.309594914317131, "flow/cos_sim": 0.561256805434823, "flow/improvement_ratio": 0.9640381913632154, "flow/mag_ratio_mean": 0.5361321400851011, "flow/mag_ratio_std": 0.22544911736622453, "step": 49152 }, { "epoch": 0.510887754783856, "eval_cos_loss": 0.4387433025985956, "eval_loss": 1.5289665646851063, "eval_mse_loss": 1.309594914317131, "eval_runtime": 2.5843, "eval_samples_per_second": 773.896, "eval_steps_per_second": 12.382, "flow/cos_sim": 0.561256805434823, "flow/improvement_ratio": 0.9640381913632154, "flow/mag_ratio_mean": 0.5361321400851011, "flow/mag_ratio_std": 0.22544911736622453, "step": 49152 }, { "epoch": 0.5215312496751863, "grad_norm": 0.21871572732925415, "learning_rate": 0.0004912557160435426, "loss": 1.551537036895752, "step": 50176 }, { "epoch": 0.5215312496751863, "eval_cos_loss": 0.4424938661977649, "eval_loss": 1.5380571633577347, "eval_mse_loss": 1.316810242831707, "flow/cos_sim": 0.5575060974806547, "flow/improvement_ratio": 0.9636888317763805, "flow/mag_ratio_mean": 0.5369405504316092, "flow/mag_ratio_std": 0.22526462702080607, "step": 50176 }, { "epoch": 0.5215312496751863, "eval_cos_loss": 0.4424938661977649, "eval_loss": 1.5380571633577347, "eval_mse_loss": 1.316810242831707, "eval_runtime": 2.6421, "eval_samples_per_second": 756.983, "eval_steps_per_second": 12.112, "flow/cos_sim": 0.5575060974806547, "flow/improvement_ratio": 0.9636888317763805, "flow/mag_ratio_mean": 0.5369405504316092, "flow/mag_ratio_std": 0.22526462702080607, "step": 50176 }, { "epoch": 0.5321747445665167, "grad_norm": 0.19360916316509247, "learning_rate": 0.000474013640007982, "loss": 1.550221562385559, "step": 51200 }, { "epoch": 0.5321747445665167, "eval_cos_loss": 0.44469246733933687, "eval_loss": 1.5433855392038822, "eval_mse_loss": 1.3210392966866493, "flow/cos_sim": 0.5553075838834047, "flow/improvement_ratio": 0.9643128626048565, "flow/mag_ratio_mean": 0.5328638143837452, "flow/mag_ratio_std": 0.2267028819769621, "step": 51200 }, { "epoch": 0.5321747445665167, "eval_cos_loss": 0.44469246733933687, "eval_loss": 1.5433855392038822, "eval_mse_loss": 1.3210392966866493, "eval_runtime": 2.5901, "eval_samples_per_second": 772.182, "eval_steps_per_second": 12.355, "flow/cos_sim": 0.5553075838834047, "flow/improvement_ratio": 0.9643128626048565, "flow/mag_ratio_mean": 0.5328638143837452, "flow/mag_ratio_std": 0.2267028819769621, "step": 51200 }, { "epoch": 0.542818239457847, "grad_norm": 0.25140267610549927, "learning_rate": 0.0004567857008049507, "loss": 1.5501980781555176, "step": 52224 }, { "epoch": 0.542818239457847, "eval_cos_loss": 0.44003486074507236, "eval_loss": 1.5340061485767365, "eval_mse_loss": 1.3139887191355228, "flow/cos_sim": 0.5599651224911213, "flow/improvement_ratio": 0.9662698730826378, "flow/mag_ratio_mean": 0.5311327101662755, "flow/mag_ratio_std": 0.21956392657011747, "step": 52224 }, { "epoch": 0.542818239457847, "eval_cos_loss": 0.44003486074507236, "eval_loss": 1.5340061485767365, "eval_mse_loss": 1.3139887191355228, "eval_runtime": 2.5514, "eval_samples_per_second": 783.875, "eval_steps_per_second": 12.542, "flow/cos_sim": 0.5599651224911213, "flow/improvement_ratio": 0.9662698730826378, "flow/mag_ratio_mean": 0.5311327101662755, "flow/mag_ratio_std": 0.21956392657011747, "step": 52224 }, { "epoch": 0.5534617343491773, "grad_norm": 0.1874593198299408, "learning_rate": 0.0004396260548863663, "loss": 1.5494704246520996, "step": 53248 }, { "epoch": 0.5534617343491773, "eval_cos_loss": 0.4375583464279771, "eval_loss": 1.5273119732737541, "eval_mse_loss": 1.3085327930748463, "flow/cos_sim": 0.5624416321516037, "flow/improvement_ratio": 0.9654307011514902, "flow/mag_ratio_mean": 0.5390398278832436, "flow/mag_ratio_std": 0.2204155451618135, "step": 53248 }, { "epoch": 0.5534617343491773, "eval_cos_loss": 0.4375583464279771, "eval_loss": 1.5273119732737541, "eval_mse_loss": 1.3085327930748463, "eval_runtime": 2.9751, "eval_samples_per_second": 672.236, "eval_steps_per_second": 10.756, "flow/cos_sim": 0.5624416321516037, "flow/improvement_ratio": 0.9654307011514902, "flow/mag_ratio_mean": 0.5390398278832436, "flow/mag_ratio_std": 0.2204155451618135, "step": 53248 }, { "epoch": 0.5641052292405077, "grad_norm": 0.260960191488266, "learning_rate": 0.000422521628012444, "loss": 1.5488193035125732, "step": 54272 }, { "epoch": 0.5641052292405077, "eval_cos_loss": 0.4423528155311942, "eval_loss": 1.5401365533471107, "eval_mse_loss": 1.318960152566433, "flow/cos_sim": 0.5576471537351608, "flow/improvement_ratio": 0.9645018931478262, "flow/mag_ratio_mean": 0.5321623589843512, "flow/mag_ratio_std": 0.22070467984303832, "step": 54272 }, { "epoch": 0.5641052292405077, "eval_cos_loss": 0.4423528155311942, "eval_loss": 1.5401365533471107, "eval_mse_loss": 1.318960152566433, "eval_runtime": 2.7006, "eval_samples_per_second": 740.582, "eval_steps_per_second": 11.849, "flow/cos_sim": 0.5576471537351608, "flow/improvement_ratio": 0.9645018931478262, "flow/mag_ratio_mean": 0.5321623589843512, "flow/mag_ratio_std": 0.22070467984303832, "step": 54272 }, { "epoch": 0.574748724131838, "grad_norm": 0.18275974690914154, "learning_rate": 0.00040552618837104806, "loss": 1.5481247901916504, "step": 55296 }, { "epoch": 0.574748724131838, "eval_cos_loss": 0.4375903755426407, "eval_loss": 1.52578229829669, "eval_mse_loss": 1.3069871068000793, "flow/cos_sim": 0.5624096170067787, "flow/improvement_ratio": 0.9644604399800301, "flow/mag_ratio_mean": 0.5396961104124784, "flow/mag_ratio_std": 0.22439152654260397, "step": 55296 }, { "epoch": 0.574748724131838, "eval_cos_loss": 0.4375903755426407, "eval_loss": 1.52578229829669, "eval_mse_loss": 1.3069871068000793, "eval_runtime": 2.5932, "eval_samples_per_second": 771.259, "eval_steps_per_second": 12.34, "flow/cos_sim": 0.5624096170067787, "flow/improvement_ratio": 0.9644604399800301, "flow/mag_ratio_mean": 0.5396961104124784, "flow/mag_ratio_std": 0.22439152654260397, "step": 55296 }, { "epoch": 0.5853922190231683, "grad_norm": 0.19556212425231934, "learning_rate": 0.00038862678683408014, "loss": 1.548845648765564, "step": 56320 }, { "epoch": 0.5853922190231683, "eval_cos_loss": 0.44385355431586504, "eval_loss": 1.5408159419894218, "eval_mse_loss": 1.3188891597092152, "flow/cos_sim": 0.5561465304344893, "flow/improvement_ratio": 0.9655895195901394, "flow/mag_ratio_mean": 0.5379520216956735, "flow/mag_ratio_std": 0.22702415706589818, "step": 56320 }, { "epoch": 0.5853922190231683, "eval_cos_loss": 0.44385355431586504, "eval_loss": 1.5408159419894218, "eval_mse_loss": 1.3188891597092152, "eval_runtime": 2.811, "eval_samples_per_second": 711.502, "eval_steps_per_second": 11.384, "flow/cos_sim": 0.5561465304344893, "flow/improvement_ratio": 0.9655895195901394, "flow/mag_ratio_mean": 0.5379520216956735, "flow/mag_ratio_std": 0.22702415706589818, "step": 56320 }, { "epoch": 0.5960357139144986, "grad_norm": 0.154473677277565, "learning_rate": 0.00037187654708719937, "loss": 1.5476142168045044, "step": 57344 }, { "epoch": 0.5960357139144986, "eval_cos_loss": 0.44051590468734503, "eval_loss": 1.5316696986556053, "eval_mse_loss": 1.3114117458462715, "flow/cos_sim": 0.5594841903075576, "flow/improvement_ratio": 0.9659801628440619, "flow/mag_ratio_mean": 0.5374981416389346, "flow/mag_ratio_std": 0.2266282932832837, "step": 57344 }, { "epoch": 0.5960357139144986, "eval_cos_loss": 0.44051590468734503, "eval_loss": 1.5316696986556053, "eval_mse_loss": 1.3114117458462715, "eval_runtime": 2.9501, "eval_samples_per_second": 677.94, "eval_steps_per_second": 10.847, "flow/cos_sim": 0.5594841903075576, "flow/improvement_ratio": 0.9659801628440619, "flow/mag_ratio_mean": 0.5374981416389346, "flow/mag_ratio_std": 0.2266282932832837, "step": 57344 }, { "epoch": 0.6066792088058289, "grad_norm": 0.19920258224010468, "learning_rate": 0.00035526270682447326, "loss": 1.546189785003662, "step": 58368 }, { "epoch": 0.6066792088058289, "eval_cos_loss": 0.45052160415798426, "eval_loss": 1.5585628859698772, "eval_mse_loss": 1.333302080631256, "flow/cos_sim": 0.5494783949106932, "flow/improvement_ratio": 0.9626638870686293, "flow/mag_ratio_mean": 0.5275079058483243, "flow/mag_ratio_std": 0.22685475973412395, "step": 58368 }, { "epoch": 0.6066792088058289, "eval_cos_loss": 0.45052160415798426, "eval_loss": 1.5585628859698772, "eval_mse_loss": 1.333302080631256, "eval_runtime": 2.5748, "eval_samples_per_second": 776.758, "eval_steps_per_second": 12.428, "flow/cos_sim": 0.5494783949106932, "flow/improvement_ratio": 0.9626638870686293, "flow/mag_ratio_mean": 0.5275079058483243, "flow/mag_ratio_std": 0.22685475973412395, "step": 58368 }, { "epoch": 0.6173227036971594, "grad_norm": 0.15236690640449524, "learning_rate": 0.0003388374920626505, "loss": 1.546614408493042, "step": 59392 }, { "epoch": 0.6173227036971594, "eval_cos_loss": 0.44333774503320456, "eval_loss": 1.5386833399534225, "eval_mse_loss": 1.3170144706964493, "flow/cos_sim": 0.5566621888428926, "flow/improvement_ratio": 0.9644087161868811, "flow/mag_ratio_mean": 0.536553805693984, "flow/mag_ratio_std": 0.22319983318448067, "step": 59392 }, { "epoch": 0.6173227036971594, "eval_cos_loss": 0.44333774503320456, "eval_loss": 1.5386833399534225, "eval_mse_loss": 1.3170144706964493, "eval_runtime": 2.5638, "eval_samples_per_second": 780.079, "eval_steps_per_second": 12.481, "flow/cos_sim": 0.5566621888428926, "flow/improvement_ratio": 0.9644087161868811, "flow/mag_ratio_mean": 0.536553805693984, "flow/mag_ratio_std": 0.22319983318448067, "step": 59392 }, { "epoch": 0.6279661985884897, "grad_norm": 0.20959317684173584, "learning_rate": 0.0003225883881381223, "loss": 1.5455403327941895, "step": 60416 }, { "epoch": 0.6279661985884897, "eval_cos_loss": 0.4418268231675029, "eval_loss": 1.5369167998433113, "eval_mse_loss": 1.3160033896565437, "flow/cos_sim": 0.5581732373684645, "flow/improvement_ratio": 0.9641634412109852, "flow/mag_ratio_mean": 0.5371082350611687, "flow/mag_ratio_std": 0.22345659835264087, "step": 60416 }, { "epoch": 0.6279661985884897, "eval_cos_loss": 0.4418268231675029, "eval_loss": 1.5369167998433113, "eval_mse_loss": 1.3160033896565437, "eval_runtime": 3.0215, "eval_samples_per_second": 661.926, "eval_steps_per_second": 10.591, "flow/cos_sim": 0.5581732373684645, "flow/improvement_ratio": 0.9641634412109852, "flow/mag_ratio_mean": 0.5371082350611687, "flow/mag_ratio_std": 0.22345659835264087, "step": 60416 }, { "epoch": 0.63860969347982, "grad_norm": 0.14656518399715424, "learning_rate": 0.00030656647450995957, "loss": 1.5472609996795654, "step": 61440 }, { "epoch": 0.63860969347982, "eval_cos_loss": 0.4388773338869214, "eval_loss": 1.5277933366596699, "eval_mse_loss": 1.308354664593935, "flow/cos_sim": 0.5611227098852396, "flow/improvement_ratio": 0.9615725018084049, "flow/mag_ratio_mean": 0.5404465068131685, "flow/mag_ratio_std": 0.22536235908046365, "step": 61440 }, { "epoch": 0.63860969347982, "eval_cos_loss": 0.4388773338869214, "eval_loss": 1.5277933366596699, "eval_mse_loss": 1.308354664593935, "eval_runtime": 2.725, "eval_samples_per_second": 733.933, "eval_steps_per_second": 11.743, "flow/cos_sim": 0.5611227098852396, "flow/improvement_ratio": 0.9615725018084049, "flow/mag_ratio_mean": 0.5404465068131685, "flow/mag_ratio_std": 0.22536235908046365, "step": 61440 }, { "epoch": 0.6492531883711503, "grad_norm": 0.16984502971172333, "learning_rate": 0.0002907595437968689, "loss": 1.546900987625122, "step": 62464 }, { "epoch": 0.6492531883711503, "eval_cos_loss": 0.44708416890352964, "eval_loss": 1.550130132585764, "eval_mse_loss": 1.3265880458056927, "flow/cos_sim": 0.5529158180579543, "flow/improvement_ratio": 0.964706290513277, "flow/mag_ratio_mean": 0.5327735636383295, "flow/mag_ratio_std": 0.22531110560521483, "step": 62464 }, { "epoch": 0.6492531883711503, "eval_cos_loss": 0.44708416890352964, "eval_loss": 1.550130132585764, "eval_mse_loss": 1.3265880458056927, "eval_runtime": 2.9226, "eval_samples_per_second": 684.317, "eval_steps_per_second": 10.949, "flow/cos_sim": 0.5529158180579543, "flow/improvement_ratio": 0.964706290513277, "flow/mag_ratio_mean": 0.5327735636383295, "flow/mag_ratio_std": 0.22531110560521483, "step": 62464 }, { "epoch": 0.6598966832624806, "grad_norm": 0.16193066537380219, "learning_rate": 0.00027521728547552225, "loss": 1.5464342832565308, "step": 63488 }, { "epoch": 0.6598966832624806, "eval_cos_loss": 0.44685271102935076, "eval_loss": 1.5440139174461365, "eval_mse_loss": 1.3205875717103481, "flow/cos_sim": 0.5531473197042942, "flow/improvement_ratio": 0.9620692692697048, "flow/mag_ratio_mean": 0.5350175518542528, "flow/mag_ratio_std": 0.23371326178312302, "step": 63488 }, { "epoch": 0.6598966832624806, "eval_cos_loss": 0.44685271102935076, "eval_loss": 1.5440139174461365, "eval_mse_loss": 1.3205875717103481, "eval_runtime": 2.588, "eval_samples_per_second": 772.809, "eval_steps_per_second": 12.365, "flow/cos_sim": 0.5531473197042942, "flow/improvement_ratio": 0.9620692692697048, "flow/mag_ratio_mean": 0.5350175518542528, "flow/mag_ratio_std": 0.23371326178312302, "step": 63488 }, { "epoch": 0.670540178153811, "grad_norm": 0.15779773890972137, "learning_rate": 0.00025992785762254565, "loss": 1.545114278793335, "step": 64512 }, { "epoch": 0.670540178153811, "eval_cos_loss": 0.4412845829501748, "eval_loss": 1.5324720852077007, "eval_mse_loss": 1.311829797923565, "flow/cos_sim": 0.5587154757231474, "flow/improvement_ratio": 0.9654579609632492, "flow/mag_ratio_mean": 0.5386016722768545, "flow/mag_ratio_std": 0.22591237584128976, "step": 64512 }, { "epoch": 0.670540178153811, "eval_cos_loss": 0.4412845829501748, "eval_loss": 1.5324720852077007, "eval_mse_loss": 1.311829797923565, "eval_runtime": 2.5359, "eval_samples_per_second": 788.675, "eval_steps_per_second": 12.619, "flow/cos_sim": 0.5587154757231474, "flow/improvement_ratio": 0.9654579609632492, "flow/mag_ratio_mean": 0.5386016722768545, "flow/mag_ratio_std": 0.22591237584128976, "step": 64512 }, { "epoch": 0.6811836730451413, "grad_norm": 0.13823935389518738, "learning_rate": 0.00024493932293160253, "loss": 1.5480190515518188, "step": 65536 }, { "epoch": 0.6811836730451413, "eval_cos_loss": 0.441369004547596, "eval_loss": 1.5341729335486889, "eval_mse_loss": 1.3134884350001812, "flow/cos_sim": 0.5586309880018234, "flow/improvement_ratio": 0.9630204811692238, "flow/mag_ratio_mean": 0.5338549390435219, "flow/mag_ratio_std": 0.2280183294788003, "step": 65536 }, { "epoch": 0.6811836730451413, "eval_cos_loss": 0.441369004547596, "eval_loss": 1.5341729335486889, "eval_mse_loss": 1.3134884350001812, "eval_runtime": 2.5158, "eval_samples_per_second": 794.978, "eval_steps_per_second": 12.72, "flow/cos_sim": 0.5586309880018234, "flow/improvement_ratio": 0.9630204811692238, "flow/mag_ratio_mean": 0.5338549390435219, "flow/mag_ratio_std": 0.2280183294788003, "step": 65536 }, { "epoch": 0.6918271679364716, "grad_norm": 0.15880072116851807, "learning_rate": 0.00023024026137121335, "loss": 1.5464882850646973, "step": 66560 }, { "epoch": 0.6918271679364716, "eval_cos_loss": 0.4457838609814644, "eval_loss": 1.543992355465889, "eval_mse_loss": 1.3211004212498665, "flow/cos_sim": 0.554216225631535, "flow/improvement_ratio": 0.9604913741350174, "flow/mag_ratio_mean": 0.5307466676458716, "flow/mag_ratio_std": 0.22845542011782527, "step": 66560 }, { "epoch": 0.6918271679364716, "eval_cos_loss": 0.4457838609814644, "eval_loss": 1.543992355465889, "eval_mse_loss": 1.3211004212498665, "eval_runtime": 2.821, "eval_samples_per_second": 708.957, "eval_steps_per_second": 11.343, "flow/cos_sim": 0.554216225631535, "flow/improvement_ratio": 0.9604913741350174, "flow/mag_ratio_mean": 0.5307466676458716, "flow/mag_ratio_std": 0.22845542011782527, "step": 66560 }, { "epoch": 0.702470662827802, "grad_norm": 0.16780522465705872, "learning_rate": 0.0002158768798040357, "loss": 1.5465317964553833, "step": 67584 }, { "epoch": 0.702470662827802, "eval_cos_loss": 0.4425074281170964, "eval_loss": 1.5348311252892017, "eval_mse_loss": 1.3135774172842503, "flow/cos_sim": 0.557492695748806, "flow/improvement_ratio": 0.9596080742776394, "flow/mag_ratio_mean": 0.5359784308820963, "flow/mag_ratio_std": 0.22894051391631365, "step": 67584 }, { "epoch": 0.702470662827802, "eval_cos_loss": 0.4425074281170964, "eval_loss": 1.5348311252892017, "eval_mse_loss": 1.3135774172842503, "eval_runtime": 3.1093, "eval_samples_per_second": 643.239, "eval_steps_per_second": 10.292, "flow/cos_sim": 0.557492695748806, "flow/improvement_ratio": 0.9596080742776394, "flow/mag_ratio_mean": 0.5359784308820963, "flow/mag_ratio_std": 0.22894051391631365, "step": 67584 }, { "epoch": 0.7131141577191323, "grad_norm": 0.14971283078193665, "learning_rate": 0.0002018382345138871, "loss": 1.5448497533798218, "step": 68608 }, { "epoch": 0.7131141577191323, "eval_cos_loss": 0.4460556413978338, "eval_loss": 1.544582311064005, "eval_mse_loss": 1.3215544857084751, "flow/cos_sim": 0.5539443735033274, "flow/improvement_ratio": 0.9611221943050623, "flow/mag_ratio_mean": 0.5354085844010115, "flow/mag_ratio_std": 0.23012477485463023, "step": 68608 }, { "epoch": 0.7131141577191323, "eval_cos_loss": 0.4460556413978338, "eval_loss": 1.544582311064005, "eval_mse_loss": 1.3215544857084751, "eval_runtime": 2.5817, "eval_samples_per_second": 774.689, "eval_steps_per_second": 12.395, "flow/cos_sim": 0.5539443735033274, "flow/improvement_ratio": 0.9611221943050623, "flow/mag_ratio_mean": 0.5354085844010115, "flow/mag_ratio_std": 0.23012477485463023, "step": 68608 }, { "epoch": 0.7237576526104627, "grad_norm": 0.14716410636901855, "learning_rate": 0.00018816845632858647, "loss": 1.5454319715499878, "step": 69632 }, { "epoch": 0.7237576526104627, "eval_cos_loss": 0.4387012077495456, "eval_loss": 1.527622751891613, "eval_mse_loss": 1.3082721531391144, "flow/cos_sim": 0.5612988974899054, "flow/improvement_ratio": 0.9638887159526348, "flow/mag_ratio_mean": 0.5365499863401055, "flow/mag_ratio_std": 0.2257627071812749, "step": 69632 }, { "epoch": 0.7237576526104627, "eval_cos_loss": 0.4387012077495456, "eval_loss": 1.527622751891613, "eval_mse_loss": 1.3082721531391144, "eval_runtime": 2.5421, "eval_samples_per_second": 786.736, "eval_steps_per_second": 12.588, "flow/cos_sim": 0.5612988974899054, "flow/improvement_ratio": 0.9638887159526348, "flow/mag_ratio_mean": 0.5365499863401055, "flow/mag_ratio_std": 0.2257627071812749, "step": 69632 }, { "epoch": 0.734401147501793, "grad_norm": 0.1667480319738388, "learning_rate": 0.00017485713000071836, "loss": 1.5442575216293335, "step": 70656 }, { "epoch": 0.734401147501793, "eval_cos_loss": 0.44433039985597134, "eval_loss": 1.540200136601925, "eval_mse_loss": 1.3180349357426167, "flow/cos_sim": 0.5556696448475122, "flow/improvement_ratio": 0.9625885803252459, "flow/mag_ratio_mean": 0.5337581913918257, "flow/mag_ratio_std": 0.22786249266937375, "step": 70656 }, { "epoch": 0.734401147501793, "eval_cos_loss": 0.44433039985597134, "eval_loss": 1.540200136601925, "eval_mse_loss": 1.3180349357426167, "eval_runtime": 2.5514, "eval_samples_per_second": 783.877, "eval_steps_per_second": 12.542, "flow/cos_sim": 0.5556696448475122, "flow/improvement_ratio": 0.9625885803252459, "flow/mag_ratio_mean": 0.5337581913918257, "flow/mag_ratio_std": 0.22786249266937375, "step": 70656 }, { "epoch": 0.7450446423931233, "grad_norm": 0.21780510246753693, "learning_rate": 0.00016194610001300043, "loss": 1.5446763038635254, "step": 71680 }, { "epoch": 0.7450446423931233, "eval_cos_loss": 0.44072897639125586, "eval_loss": 1.53380636125803, "eval_mse_loss": 1.3134418688714504, "flow/cos_sim": 0.559271028265357, "flow/improvement_ratio": 0.9642956424504519, "flow/mag_ratio_mean": 0.5349069200456142, "flow/mag_ratio_std": 0.22263477742671967, "step": 71680 }, { "epoch": 0.7450446423931233, "eval_cos_loss": 0.44072897639125586, "eval_loss": 1.53380636125803, "eval_mse_loss": 1.3134418688714504, "eval_runtime": 2.544, "eval_samples_per_second": 786.162, "eval_steps_per_second": 12.579, "flow/cos_sim": 0.559271028265357, "flow/improvement_ratio": 0.9642956424504519, "flow/mag_ratio_mean": 0.5349069200456142, "flow/mag_ratio_std": 0.22263477742671967, "step": 71680 }, { "epoch": 0.7556881372844536, "grad_norm": 0.21069595217704773, "learning_rate": 0.0001494255292217801, "loss": 1.544022560119629, "step": 72704 }, { "epoch": 0.7556881372844536, "eval_cos_loss": 0.44525754544883966, "eval_loss": 1.5455412901937962, "eval_mse_loss": 1.322912521660328, "flow/cos_sim": 0.5547424238175154, "flow/improvement_ratio": 0.9653947055339813, "flow/mag_ratio_mean": 0.5336724892258644, "flow/mag_ratio_std": 0.22635432425886393, "step": 72704 }, { "epoch": 0.7556881372844536, "eval_cos_loss": 0.44525754544883966, "eval_loss": 1.5455412901937962, "eval_mse_loss": 1.322912521660328, "eval_runtime": 2.8381, "eval_samples_per_second": 704.708, "eval_steps_per_second": 11.275, "flow/cos_sim": 0.5547424238175154, "flow/improvement_ratio": 0.9653947055339813, "flow/mag_ratio_mean": 0.5336724892258644, "flow/mag_ratio_std": 0.22635432425886393, "step": 72704 }, { "epoch": 0.7663316321757839, "grad_norm": 0.13283193111419678, "learning_rate": 0.0001373347763502365, "loss": 1.545114517211914, "step": 73728 }, { "epoch": 0.7663316321757839, "eval_cos_loss": 0.44415116030722857, "eval_loss": 1.5426055118441582, "eval_mse_loss": 1.320529941469431, "flow/cos_sim": 0.5558489598333836, "flow/improvement_ratio": 0.960325175896287, "flow/mag_ratio_mean": 0.5346721205860376, "flow/mag_ratio_std": 0.2243386204354465, "step": 73728 }, { "epoch": 0.7663316321757839, "eval_cos_loss": 0.44415116030722857, "eval_loss": 1.5426055118441582, "eval_mse_loss": 1.320529941469431, "eval_runtime": 2.5354, "eval_samples_per_second": 788.834, "eval_steps_per_second": 12.621, "flow/cos_sim": 0.5558489598333836, "flow/improvement_ratio": 0.960325175896287, "flow/mag_ratio_mean": 0.5346721205860376, "flow/mag_ratio_std": 0.2243386204354465, "step": 73728 }, { "epoch": 0.7769751270671144, "grad_norm": 0.13968615233898163, "learning_rate": 0.00012566462923845807, "loss": 1.5433732271194458, "step": 74752 }, { "epoch": 0.7769751270671144, "eval_cos_loss": 0.44200514908879995, "eval_loss": 1.5366779565811157, "eval_mse_loss": 1.3156753852963448, "flow/cos_sim": 0.5579948425292969, "flow/improvement_ratio": 0.9636132828891277, "flow/mag_ratio_mean": 0.5333766378462315, "flow/mag_ratio_std": 0.22553266119211912, "step": 74752 }, { "epoch": 0.7769751270671144, "eval_cos_loss": 0.44200514908879995, "eval_loss": 1.5366779565811157, "eval_mse_loss": 1.3156753852963448, "eval_runtime": 2.4816, "eval_samples_per_second": 805.947, "eval_steps_per_second": 12.895, "flow/cos_sim": 0.5579948425292969, "flow/improvement_ratio": 0.9636132828891277, "flow/mag_ratio_mean": 0.5333766378462315, "flow/mag_ratio_std": 0.22553266119211912, "step": 74752 }, { "epoch": 0.7876186219584447, "grad_norm": 0.22951993346214294, "learning_rate": 0.00011445177328180711, "loss": 1.5437688827514648, "step": 75776 }, { "epoch": 0.7876186219584447, "eval_cos_loss": 0.44082553684711456, "eval_loss": 1.5322747267782688, "eval_mse_loss": 1.3118619658052921, "flow/cos_sim": 0.5591745115816593, "flow/improvement_ratio": 0.9597889501601458, "flow/mag_ratio_mean": 0.5374172441661358, "flow/mag_ratio_std": 0.22841465286910534, "step": 75776 }, { "epoch": 0.7876186219584447, "eval_cos_loss": 0.44082553684711456, "eval_loss": 1.5322747267782688, "eval_mse_loss": 1.3118619658052921, "eval_runtime": 2.5567, "eval_samples_per_second": 782.259, "eval_steps_per_second": 12.516, "flow/cos_sim": 0.5591745115816593, "flow/improvement_ratio": 0.9597889501601458, "flow/mag_ratio_mean": 0.5374172441661358, "flow/mag_ratio_std": 0.22841465286910534, "step": 75776 }, { "epoch": 0.798262116849775, "grad_norm": 0.15128082036972046, "learning_rate": 0.00010369794845541591, "loss": 1.545216679573059, "step": 76800 }, { "epoch": 0.798262116849775, "eval_cos_loss": 0.4396012471988797, "eval_loss": 1.5277335830032825, "eval_mse_loss": 1.3079329580068588, "flow/cos_sim": 0.5603987323120236, "flow/improvement_ratio": 0.9618115201592445, "flow/mag_ratio_mean": 0.5371274519711733, "flow/mag_ratio_std": 0.22797544300556183, "step": 76800 }, { "epoch": 0.798262116849775, "eval_cos_loss": 0.4396012471988797, "eval_loss": 1.5277335830032825, "eval_mse_loss": 1.3079329580068588, "eval_runtime": 2.4926, "eval_samples_per_second": 802.376, "eval_steps_per_second": 12.838, "flow/cos_sim": 0.5603987323120236, "flow/improvement_ratio": 0.9618115201592445, "flow/mag_ratio_mean": 0.5371274519711733, "flow/mag_ratio_std": 0.22797544300556183, "step": 76800 }, { "epoch": 0.8089056117411053, "grad_norm": 0.15258397161960602, "learning_rate": 9.340614224932947e-05, "loss": 1.5437705516815186, "step": 77824 }, { "epoch": 0.8089056117411053, "eval_cos_loss": 0.4406488761305809, "eval_loss": 1.5305031947791576, "eval_mse_loss": 1.3101787567138672, "flow/cos_sim": 0.5593511275947094, "flow/improvement_ratio": 0.9616372548043728, "flow/mag_ratio_mean": 0.5346422707661986, "flow/mag_ratio_std": 0.22846621181815863, "step": 77824 }, { "epoch": 0.8089056117411053, "eval_cos_loss": 0.4406488761305809, "eval_loss": 1.5305031947791576, "eval_mse_loss": 1.3101787567138672, "eval_runtime": 2.5281, "eval_samples_per_second": 791.111, "eval_steps_per_second": 12.658, "flow/cos_sim": 0.5593511275947094, "flow/improvement_ratio": 0.9616372548043728, "flow/mag_ratio_mean": 0.5346422707661986, "flow/mag_ratio_std": 0.22846621181815863, "step": 77824 }, { "epoch": 0.8195491066324356, "grad_norm": 0.15012474358081818, "learning_rate": 8.359937073738122e-05, "loss": 1.5427945852279663, "step": 78848 }, { "epoch": 0.8195491066324356, "eval_cos_loss": 0.4398349104449153, "eval_loss": 1.5277978368103504, "eval_mse_loss": 1.3078803904354572, "flow/cos_sim": 0.5601652916520834, "flow/improvement_ratio": 0.9623467661440372, "flow/mag_ratio_mean": 0.5370206441730261, "flow/mag_ratio_std": 0.2306741690263152, "step": 78848 }, { "epoch": 0.8195491066324356, "eval_cos_loss": 0.4398349104449153, "eval_loss": 1.5277978368103504, "eval_mse_loss": 1.3078803904354572, "eval_runtime": 2.9061, "eval_samples_per_second": 688.21, "eval_steps_per_second": 11.011, "flow/cos_sim": 0.5601652916520834, "flow/improvement_ratio": 0.9623467661440372, "flow/mag_ratio_mean": 0.5370206441730261, "flow/mag_ratio_std": 0.2306741690263152, "step": 78848 }, { "epoch": 0.830192601523766, "grad_norm": 0.14434155821800232, "learning_rate": 7.429817849801124e-05, "loss": 1.5424107313156128, "step": 79872 }, { "epoch": 0.830192601523766, "eval_cos_loss": 0.440770055167377, "eval_loss": 1.5335130989551544, "eval_mse_loss": 1.31312807649374, "flow/cos_sim": 0.5592298936098814, "flow/improvement_ratio": 0.9679525289684534, "flow/mag_ratio_mean": 0.5366982752457261, "flow/mag_ratio_std": 0.22420579148456454, "step": 79872 }, { "epoch": 0.830192601523766, "eval_cos_loss": 0.440770055167377, "eval_loss": 1.5335130989551544, "eval_mse_loss": 1.31312807649374, "eval_runtime": 2.4978, "eval_samples_per_second": 800.693, "eval_steps_per_second": 12.811, "flow/cos_sim": 0.5592298936098814, "flow/improvement_ratio": 0.9679525289684534, "flow/mag_ratio_mean": 0.5366982752457261, "flow/mag_ratio_std": 0.22420579148456454, "step": 79872 }, { "epoch": 0.8408360964150963, "grad_norm": 0.14048728346824646, "learning_rate": 6.54954787872275e-05, "loss": 1.5442392826080322, "step": 80896 }, { "epoch": 0.8408360964150963, "eval_cos_loss": 0.44281749427318573, "eval_loss": 1.5380274765193462, "eval_mse_loss": 1.3166187293827534, "flow/cos_sim": 0.5571825094521046, "flow/improvement_ratio": 0.9661159794777632, "flow/mag_ratio_mean": 0.5346010681241751, "flow/mag_ratio_std": 0.22787420498207211, "step": 80896 }, { "epoch": 0.8408360964150963, "eval_cos_loss": 0.44281749427318573, "eval_loss": 1.5380274765193462, "eval_mse_loss": 1.3166187293827534, "eval_runtime": 2.5057, "eval_samples_per_second": 798.189, "eval_steps_per_second": 12.771, "flow/cos_sim": 0.5571825094521046, "flow/improvement_ratio": 0.9661159794777632, "flow/mag_ratio_mean": 0.5346010681241751, "flow/mag_ratio_std": 0.22787420498207211, "step": 80896 }, { "epoch": 0.8514795913064266, "grad_norm": 0.15351833403110504, "learning_rate": 5.7211109010678276e-05, "loss": 1.5441709756851196, "step": 81920 }, { "epoch": 0.8514795913064266, "eval_cos_loss": 0.43972852267324924, "eval_loss": 1.5298539474606514, "eval_mse_loss": 1.3099896907806396, "flow/cos_sim": 0.5602715257555246, "flow/improvement_ratio": 0.9654016513377428, "flow/mag_ratio_mean": 0.5356091465801001, "flow/mag_ratio_std": 0.22713992185890675, "step": 81920 }, { "epoch": 0.8514795913064266, "eval_cos_loss": 0.43972852267324924, "eval_loss": 1.5298539474606514, "eval_mse_loss": 1.3099896907806396, "eval_runtime": 3.0577, "eval_samples_per_second": 654.077, "eval_steps_per_second": 10.465, "flow/cos_sim": 0.5602715257555246, "flow/improvement_ratio": 0.9654016513377428, "flow/mag_ratio_mean": 0.5356091465801001, "flow/mag_ratio_std": 0.22713992185890675, "step": 81920 }, { "epoch": 0.862123086197757, "grad_norm": 0.14501118659973145, "learning_rate": 4.946226542264676e-05, "loss": 1.5440622568130493, "step": 82944 }, { "epoch": 0.862123086197757, "eval_cos_loss": 0.4398494055494666, "eval_loss": 1.5285302698612213, "eval_mse_loss": 1.3086055591702461, "flow/cos_sim": 0.5601506568491459, "flow/improvement_ratio": 0.9636457152664661, "flow/mag_ratio_mean": 0.5373753281310201, "flow/mag_ratio_std": 0.22595488466322422, "step": 82944 }, { "epoch": 0.862123086197757, "eval_cos_loss": 0.4398494055494666, "eval_loss": 1.5285302698612213, "eval_mse_loss": 1.3086055591702461, "eval_runtime": 2.6735, "eval_samples_per_second": 748.087, "eval_steps_per_second": 11.969, "flow/cos_sim": 0.5601506568491459, "flow/improvement_ratio": 0.9636457152664661, "flow/mag_ratio_mean": 0.5373753281310201, "flow/mag_ratio_std": 0.22595488466322422, "step": 82944 }, { "epoch": 0.8727665810890873, "grad_norm": 0.1732260137796402, "learning_rate": 4.2243044041220535e-05, "loss": 1.5427820682525635, "step": 83968 }, { "epoch": 0.8727665810890873, "eval_cos_loss": 0.43963075149804354, "eval_loss": 1.5304800160229206, "eval_mse_loss": 1.310664638876915, "flow/cos_sim": 0.5603692829608917, "flow/improvement_ratio": 0.9656369760632515, "flow/mag_ratio_mean": 0.5373333236202598, "flow/mag_ratio_std": 0.22598782274872065, "step": 83968 }, { "epoch": 0.8727665810890873, "eval_cos_loss": 0.43963075149804354, "eval_loss": 1.5304800160229206, "eval_mse_loss": 1.310664638876915, "eval_runtime": 2.6771, "eval_samples_per_second": 747.086, "eval_steps_per_second": 11.953, "flow/cos_sim": 0.5603692829608917, "flow/improvement_ratio": 0.9656369760632515, "flow/mag_ratio_mean": 0.5373333236202598, "flow/mag_ratio_std": 0.22598782274872065, "step": 83968 }, { "epoch": 0.8834100759804177, "grad_norm": 0.1780228167772293, "learning_rate": 3.556989093134011e-05, "loss": 1.5418999195098877, "step": 84992 }, { "epoch": 0.8834100759804177, "eval_cos_loss": 0.4417334571480751, "eval_loss": 1.5349605418741703, "eval_mse_loss": 1.3140938207507133, "flow/cos_sim": 0.5582665763795376, "flow/improvement_ratio": 0.9667846951633692, "flow/mag_ratio_mean": 0.5365295764058828, "flow/mag_ratio_std": 0.22713460819795728, "step": 84992 }, { "epoch": 0.8834100759804177, "eval_cos_loss": 0.4417334571480751, "eval_loss": 1.5349605418741703, "eval_mse_loss": 1.3140938207507133, "eval_runtime": 2.545, "eval_samples_per_second": 785.844, "eval_steps_per_second": 12.574, "flow/cos_sim": 0.5582665763795376, "flow/improvement_ratio": 0.9667846951633692, "flow/mag_ratio_mean": 0.5365295764058828, "flow/mag_ratio_std": 0.22713460819795728, "step": 84992 }, { "epoch": 0.894053570871748, "grad_norm": 0.1589423269033432, "learning_rate": 2.9450766643112025e-05, "loss": 1.5413941144943237, "step": 86016 }, { "epoch": 0.894053570871748, "eval_cos_loss": 0.4417336033657193, "eval_loss": 1.5350174084305763, "eval_mse_loss": 1.314150609076023, "flow/cos_sim": 0.5582663975656033, "flow/improvement_ratio": 0.9610403534024954, "flow/mag_ratio_mean": 0.5371948201209307, "flow/mag_ratio_std": 0.22706548869609833, "step": 86016 }, { "epoch": 0.894053570871748, "eval_cos_loss": 0.4417336033657193, "eval_loss": 1.5350174084305763, "eval_mse_loss": 1.314150609076023, "eval_runtime": 2.516, "eval_samples_per_second": 794.928, "eval_steps_per_second": 12.719, "flow/cos_sim": 0.5582663975656033, "flow/improvement_ratio": 0.9610403534024954, "flow/mag_ratio_mean": 0.5371948201209307, "flow/mag_ratio_std": 0.22706548869609833, "step": 86016 }, { "epoch": 0.9046970657630783, "grad_norm": 0.13952629268169403, "learning_rate": 2.3898122320136107e-05, "loss": 1.5443463325500488, "step": 87040 }, { "epoch": 0.9046970657630783, "eval_cos_loss": 0.4454851495102048, "eval_loss": 1.5462815202772617, "eval_mse_loss": 1.3235389403998852, "flow/cos_sim": 0.5545149501413107, "flow/improvement_ratio": 0.9633045084774494, "flow/mag_ratio_mean": 0.5327793834730983, "flow/mag_ratio_std": 0.22570591513067484, "step": 87040 }, { "epoch": 0.9046970657630783, "eval_cos_loss": 0.4454851495102048, "eval_loss": 1.5462815202772617, "eval_mse_loss": 1.3235389403998852, "eval_runtime": 3.0237, "eval_samples_per_second": 661.435, "eval_steps_per_second": 10.583, "flow/cos_sim": 0.5545149501413107, "flow/improvement_ratio": 0.9633045084774494, "flow/mag_ratio_mean": 0.5327793834730983, "flow/mag_ratio_std": 0.22570591513067484, "step": 87040 }, { "epoch": 0.9153405606544086, "grad_norm": 0.1696067899465561, "learning_rate": 1.8907727303827394e-05, "loss": 1.5422346591949463, "step": 88064 }, { "epoch": 0.9153405606544086, "eval_cos_loss": 0.4382868492975831, "eval_loss": 1.5258447527885437, "eval_mse_loss": 1.3067013174295425, "flow/cos_sim": 0.561713146045804, "flow/improvement_ratio": 0.9594295676797628, "flow/mag_ratio_mean": 0.5373271182179451, "flow/mag_ratio_std": 0.22556039178743958, "step": 88064 }, { "epoch": 0.9153405606544086, "eval_cos_loss": 0.4382868492975831, "eval_loss": 1.5258447527885437, "eval_mse_loss": 1.3067013174295425, "eval_runtime": 2.5211, "eval_samples_per_second": 793.311, "eval_steps_per_second": 12.693, "flow/cos_sim": 0.561713146045804, "flow/improvement_ratio": 0.9594295676797628, "flow/mag_ratio_mean": 0.5373271182179451, "flow/mag_ratio_std": 0.22556039178743958, "step": 88064 }, { "epoch": 0.9259840555457389, "grad_norm": 0.15072369575500488, "learning_rate": 1.4491237768113841e-05, "loss": 1.542992353439331, "step": 89088 }, { "epoch": 0.9259840555457389, "eval_cos_loss": 0.4420606214553118, "eval_loss": 1.5344204120337963, "eval_mse_loss": 1.3133900947868824, "flow/cos_sim": 0.557939387857914, "flow/improvement_ratio": 0.9637042284011841, "flow/mag_ratio_mean": 0.5341464914381504, "flow/mag_ratio_std": 0.22699455870315433, "step": 89088 }, { "epoch": 0.9259840555457389, "eval_cos_loss": 0.4420606214553118, "eval_loss": 1.5344204120337963, "eval_mse_loss": 1.3133900947868824, "eval_runtime": 2.613, "eval_samples_per_second": 765.401, "eval_steps_per_second": 12.246, "flow/cos_sim": 0.557939387857914, "flow/improvement_ratio": 0.9637042284011841, "flow/mag_ratio_mean": 0.5341464914381504, "flow/mag_ratio_std": 0.22699455870315433, "step": 89088 }, { "epoch": 0.9366275504370692, "grad_norm": 0.15198417007923126, "learning_rate": 1.065392223983186e-05, "loss": 1.5422855615615845, "step": 90112 }, { "epoch": 0.9366275504370692, "eval_cos_loss": 0.4458601539954543, "eval_loss": 1.5451230816543102, "eval_mse_loss": 1.3221930228173733, "flow/cos_sim": 0.5541399177163839, "flow/improvement_ratio": 0.958930304273963, "flow/mag_ratio_mean": 0.5324209975078702, "flow/mag_ratio_std": 0.22846948402002454, "step": 90112 }, { "epoch": 0.9366275504370692, "eval_cos_loss": 0.4458601539954543, "eval_loss": 1.5451230816543102, "eval_mse_loss": 1.3221930228173733, "eval_runtime": 3.0844, "eval_samples_per_second": 648.42, "eval_steps_per_second": 10.375, "flow/cos_sim": 0.5541399177163839, "flow/improvement_ratio": 0.958930304273963, "flow/mag_ratio_mean": 0.5324209975078702, "flow/mag_ratio_std": 0.22846948402002454, "step": 90112 }, { "epoch": 0.9472710453283997, "grad_norm": 0.1283935010433197, "learning_rate": 7.40324956991506e-06, "loss": 1.5435974597930908, "step": 91136 }, { "epoch": 0.9472710453283997, "eval_cos_loss": 0.4411879302933812, "eval_loss": 1.5341791696846485, "eval_mse_loss": 1.313585203140974, "flow/cos_sim": 0.5588121470063925, "flow/improvement_ratio": 0.9638758208602667, "flow/mag_ratio_mean": 0.5351903941482306, "flow/mag_ratio_std": 0.22519566072151065, "step": 91136 }, { "epoch": 0.9472710453283997, "eval_cos_loss": 0.4411879302933812, "eval_loss": 1.5341791696846485, "eval_mse_loss": 1.313585203140974, "eval_runtime": 2.6762, "eval_samples_per_second": 747.335, "eval_steps_per_second": 11.957, "flow/cos_sim": 0.5588121470063925, "flow/improvement_ratio": 0.9638758208602667, "flow/mag_ratio_mean": 0.5351903941482306, "flow/mag_ratio_std": 0.22519566072151065, "step": 91136 }, { "epoch": 0.95791454021973, "grad_norm": 0.12574529647827148, "learning_rate": 4.7367430129720004e-06, "loss": 1.5435535907745361, "step": 92160 }, { "epoch": 0.95791454021973, "eval_cos_loss": 0.44530233182013035, "eval_loss": 1.5450147837400436, "eval_mse_loss": 1.3223636075854301, "flow/cos_sim": 0.5546976234763861, "flow/improvement_ratio": 0.9661824498325586, "flow/mag_ratio_mean": 0.5322676496580243, "flow/mag_ratio_std": 0.22808025730773807, "step": 92160 }, { "epoch": 0.95791454021973, "eval_cos_loss": 0.44530233182013035, "eval_loss": 1.5450147837400436, "eval_mse_loss": 1.3223636075854301, "eval_runtime": 2.5148, "eval_samples_per_second": 795.3, "eval_steps_per_second": 12.725, "flow/cos_sim": 0.5546976234763861, "flow/improvement_ratio": 0.9661824498325586, "flow/mag_ratio_mean": 0.5322676496580243, "flow/mag_ratio_std": 0.22808025730773807, "step": 92160 }, { "epoch": 0.9685580351110603, "grad_norm": 0.1402869075536728, "learning_rate": 2.662784798150131e-06, "loss": 1.5427674055099487, "step": 93184 }, { "epoch": 0.9685580351110603, "eval_cos_loss": 0.44235736317932606, "eval_loss": 1.536139328032732, "eval_mse_loss": 1.3149606361985207, "flow/cos_sim": 0.5576427038758993, "flow/improvement_ratio": 0.9603242203593254, "flow/mag_ratio_mean": 0.5352574419230223, "flow/mag_ratio_std": 0.2266941787675023, "step": 93184 }, { "epoch": 0.9685580351110603, "eval_cos_loss": 0.44235736317932606, "eval_loss": 1.536139328032732, "eval_mse_loss": 1.3149606361985207, "eval_runtime": 2.565, "eval_samples_per_second": 779.726, "eval_steps_per_second": 12.476, "flow/cos_sim": 0.5576427038758993, "flow/improvement_ratio": 0.9603242203593254, "flow/mag_ratio_mean": 0.5352574419230223, "flow/mag_ratio_std": 0.2266941787675023, "step": 93184 }, { "epoch": 0.9792015300023906, "grad_norm": 0.1338053047657013, "learning_rate": 1.1797947397548802e-06, "loss": 1.5422078371047974, "step": 94208 }, { "epoch": 0.9792015300023906, "eval_cos_loss": 0.44181027822196484, "eval_loss": 1.5355971939861774, "eval_mse_loss": 1.3146920576691628, "flow/cos_sim": 0.558189669623971, "flow/improvement_ratio": 0.9626132287085056, "flow/mag_ratio_mean": 0.5348945092409849, "flow/mag_ratio_std": 0.22780301421880722, "step": 94208 }, { "epoch": 0.9792015300023906, "eval_cos_loss": 0.44181027822196484, "eval_loss": 1.5355971939861774, "eval_mse_loss": 1.3146920576691628, "eval_runtime": 2.4969, "eval_samples_per_second": 800.987, "eval_steps_per_second": 12.816, "flow/cos_sim": 0.558189669623971, "flow/improvement_ratio": 0.9626132287085056, "flow/mag_ratio_mean": 0.5348945092409849, "flow/mag_ratio_std": 0.22780301421880722, "step": 94208 }, { "epoch": 0.989845024893721, "grad_norm": 0.12129372358322144, "learning_rate": 2.9243465362199797e-07, "loss": 1.541589379310608, "step": 95232 }, { "epoch": 0.989845024893721, "eval_cos_loss": 0.44525294937193394, "eval_loss": 1.5437418557703495, "eval_mse_loss": 1.3211153745651245, "flow/cos_sim": 0.5547471418976784, "flow/improvement_ratio": 0.9627930391579866, "flow/mag_ratio_mean": 0.5311954086646438, "flow/mag_ratio_std": 0.2286191936582327, "step": 95232 }, { "epoch": 0.989845024893721, "eval_cos_loss": 0.44525294937193394, "eval_loss": 1.5437418557703495, "eval_mse_loss": 1.3211153745651245, "eval_runtime": 2.5197, "eval_samples_per_second": 793.751, "eval_steps_per_second": 12.7, "flow/cos_sim": 0.5547471418976784, "flow/improvement_ratio": 0.9627930391579866, "flow/mag_ratio_mean": 0.5311954086646438, "flow/mag_ratio_std": 0.2286191936582327, "step": 95232 } ], "logging_steps": 1024, "max_steps": 96209, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1024, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }