| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 1024, |
| "global_step": 96209, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.010643494891330332, |
| "grad_norm": 0.13342437148094177, |
| "learning_rate": 0.0003330078125, |
| "loss": 2.2998437881469727, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.010643494891330332, |
| "eval_cos_loss": 0.5988449528813362, |
| "eval_loss": 1.9600126259028912, |
| "eval_mse_loss": 1.6605901420116425, |
| "flow/cos_sim": 0.4011551085859537, |
| "flow/improvement_ratio": 0.942170824855566, |
| "flow/mag_ratio_mean": 0.37856073677539825, |
| "flow/mag_ratio_std": 0.14085532305762172, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.010643494891330332, |
| "eval_cos_loss": 0.5988449528813362, |
| "eval_loss": 1.9600126259028912, |
| "eval_mse_loss": 1.6605901420116425, |
| "eval_runtime": 2.6584, |
| "eval_samples_per_second": 752.329, |
| "eval_steps_per_second": 12.037, |
| "flow/cos_sim": 0.4011551085859537, |
| "flow/improvement_ratio": 0.942170824855566, |
| "flow/mag_ratio_mean": 0.37856073677539825, |
| "flow/mag_ratio_std": 0.14085532305762172, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.021286989782660665, |
| "grad_norm": 0.25054192543029785, |
| "learning_rate": 0.0006663411458333333, |
| "loss": 1.8492329120635986, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.021286989782660665, |
| "eval_cos_loss": 0.5117531130090356, |
| "eval_loss": 1.7429817728698254, |
| "eval_mse_loss": 1.4871052131056786, |
| "flow/cos_sim": 0.4882468534633517, |
| "flow/improvement_ratio": 0.9563530795276165, |
| "flow/mag_ratio_mean": 0.47669631242752075, |
| "flow/mag_ratio_std": 0.17675806442275643, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.021286989782660665, |
| "eval_cos_loss": 0.5117531130090356, |
| "eval_loss": 1.7429817728698254, |
| "eval_mse_loss": 1.4871052131056786, |
| "eval_runtime": 2.511, |
| "eval_samples_per_second": 796.509, |
| "eval_steps_per_second": 12.744, |
| "flow/cos_sim": 0.4882468534633517, |
| "flow/improvement_ratio": 0.9563530795276165, |
| "flow/mag_ratio_mean": 0.47669631242752075, |
| "flow/mag_ratio_std": 0.17675806442275643, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.031930484673991, |
| "grad_norm": 0.30941224098205566, |
| "learning_rate": 0.0009996744791666667, |
| "loss": 1.730944037437439, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.031930484673991, |
| "eval_cos_loss": 0.4815286351367831, |
| "eval_loss": 1.6586528308689594, |
| "eval_mse_loss": 1.4178885221481323, |
| "flow/cos_sim": 0.5184714393690228, |
| "flow/improvement_ratio": 0.9605911839753389, |
| "flow/mag_ratio_mean": 0.49818364903330803, |
| "flow/mag_ratio_std": 0.1928270636126399, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.031930484673991, |
| "eval_cos_loss": 0.4815286351367831, |
| "eval_loss": 1.6586528308689594, |
| "eval_mse_loss": 1.4178885221481323, |
| "eval_runtime": 3.1033, |
| "eval_samples_per_second": 644.485, |
| "eval_steps_per_second": 10.312, |
| "flow/cos_sim": 0.5184714393690228, |
| "flow/improvement_ratio": 0.9605911839753389, |
| "flow/mag_ratio_mean": 0.49818364903330803, |
| "flow/mag_ratio_std": 0.1928270636126399, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.04257397956532133, |
| "grad_norm": 0.22964967787265778, |
| "learning_rate": 0.0009997023516784352, |
| "loss": 1.6850833892822266, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.04257397956532133, |
| "eval_cos_loss": 0.476364528760314, |
| "eval_loss": 1.6391540355980396, |
| "eval_mse_loss": 1.4009717665612698, |
| "flow/cos_sim": 0.5236354488879442, |
| "flow/improvement_ratio": 0.9618693646043539, |
| "flow/mag_ratio_mean": 0.5105963433161378, |
| "flow/mag_ratio_std": 0.20592432795092463, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.04257397956532133, |
| "eval_cos_loss": 0.476364528760314, |
| "eval_loss": 1.6391540355980396, |
| "eval_mse_loss": 1.4009717665612698, |
| "eval_runtime": 2.5129, |
| "eval_samples_per_second": 795.895, |
| "eval_steps_per_second": 12.734, |
| "flow/cos_sim": 0.5236354488879442, |
| "flow/improvement_ratio": 0.9618693646043539, |
| "flow/mag_ratio_mean": 0.5105963433161378, |
| "flow/mag_ratio_std": 0.20592432795092463, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.05321747445665166, |
| "grad_norm": 0.2645546495914459, |
| "learning_rate": 0.0009988085977910004, |
| "loss": 1.6617510318756104, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.05321747445665166, |
| "eval_cos_loss": 0.4789119102060795, |
| "eval_loss": 1.645260013639927, |
| "eval_mse_loss": 1.405804067850113, |
| "flow/cos_sim": 0.5210880534723401, |
| "flow/improvement_ratio": 0.9588682930916548, |
| "flow/mag_ratio_mean": 0.504288200289011, |
| "flow/mag_ratio_std": 0.20718340016901493, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.05321747445665166, |
| "eval_cos_loss": 0.4789119102060795, |
| "eval_loss": 1.645260013639927, |
| "eval_mse_loss": 1.405804067850113, |
| "eval_runtime": 3.0976, |
| "eval_samples_per_second": 645.654, |
| "eval_steps_per_second": 10.33, |
| "flow/cos_sim": 0.5210880534723401, |
| "flow/improvement_ratio": 0.9588682930916548, |
| "flow/mag_ratio_mean": 0.504288200289011, |
| "flow/mag_ratio_std": 0.20718340016901493, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.063860969347982, |
| "grad_norm": 0.2762889862060547, |
| "learning_rate": 0.0009973198042317873, |
| "loss": 1.645796775817871, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.063860969347982, |
| "eval_cos_loss": 0.4598818449303508, |
| "eval_loss": 1.5948525853455067, |
| "eval_mse_loss": 1.3649116680026054, |
| "flow/cos_sim": 0.5401182025671005, |
| "flow/improvement_ratio": 0.9647715575993061, |
| "flow/mag_ratio_mean": 0.5178880272433162, |
| "flow/mag_ratio_std": 0.21153279254212976, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.063860969347982, |
| "eval_cos_loss": 0.4598818449303508, |
| "eval_loss": 1.5948525853455067, |
| "eval_mse_loss": 1.3649116680026054, |
| "eval_runtime": 3.0831, |
| "eval_samples_per_second": 648.695, |
| "eval_steps_per_second": 10.379, |
| "flow/cos_sim": 0.5401182025671005, |
| "flow/improvement_ratio": 0.9647715575993061, |
| "flow/mag_ratio_mean": 0.5178880272433162, |
| "flow/mag_ratio_std": 0.21153279254212976, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.07450446423931233, |
| "grad_norm": 0.17679959535598755, |
| "learning_rate": 0.0009952377470151526, |
| "loss": 1.6353809833526611, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.07450446423931233, |
| "eval_cos_loss": 0.4634226718917489, |
| "eval_loss": 1.6022505089640617, |
| "eval_mse_loss": 1.3705391697585583, |
| "flow/cos_sim": 0.5365773290395737, |
| "flow/improvement_ratio": 0.9635819494724274, |
| "flow/mag_ratio_mean": 0.5194354858249426, |
| "flow/mag_ratio_std": 0.21515046246349812, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.07450446423931233, |
| "eval_cos_loss": 0.4634226718917489, |
| "eval_loss": 1.6022505089640617, |
| "eval_mse_loss": 1.3705391697585583, |
| "eval_runtime": 2.8419, |
| "eval_samples_per_second": 703.759, |
| "eval_steps_per_second": 11.26, |
| "flow/cos_sim": 0.5365773290395737, |
| "flow/improvement_ratio": 0.9635819494724274, |
| "flow/mag_ratio_mean": 0.5194354858249426, |
| "flow/mag_ratio_std": 0.21515046246349812, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.08514795913064266, |
| "grad_norm": 0.14975515007972717, |
| "learning_rate": 0.000992564909872628, |
| "loss": 1.6262034177780151, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.08514795913064266, |
| "eval_cos_loss": 0.45912545546889305, |
| "eval_loss": 1.5890175811946392, |
| "eval_mse_loss": 1.3594548553228378, |
| "flow/cos_sim": 0.5408745482563972, |
| "flow/improvement_ratio": 0.9590303134173155, |
| "flow/mag_ratio_mean": 0.5143361240625381, |
| "flow/mag_ratio_std": 0.21537457825616002, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.08514795913064266, |
| "eval_cos_loss": 0.45912545546889305, |
| "eval_loss": 1.5890175811946392, |
| "eval_mse_loss": 1.3594548553228378, |
| "eval_runtime": 2.9302, |
| "eval_samples_per_second": 682.537, |
| "eval_steps_per_second": 10.921, |
| "flow/cos_sim": 0.5408745482563972, |
| "flow/improvement_ratio": 0.9590303134173155, |
| "flow/mag_ratio_mean": 0.5143361240625381, |
| "flow/mag_ratio_std": 0.21537457825616002, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.09579145402197299, |
| "grad_norm": 0.19106586277484894, |
| "learning_rate": 0.000989307950724573, |
| "loss": 1.6214015483856201, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.09579145402197299, |
| "eval_cos_loss": 0.4567577252164483, |
| "eval_loss": 1.5844898335635662, |
| "eval_mse_loss": 1.356110967695713, |
| "flow/cos_sim": 0.5432424321770668, |
| "flow/improvement_ratio": 0.9650511220097542, |
| "flow/mag_ratio_mean": 0.5244949720799923, |
| "flow/mag_ratio_std": 0.21130397450178862, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.09579145402197299, |
| "eval_cos_loss": 0.4567577252164483, |
| "eval_loss": 1.5844898335635662, |
| "eval_mse_loss": 1.356110967695713, |
| "eval_runtime": 3.0508, |
| "eval_samples_per_second": 655.562, |
| "eval_steps_per_second": 10.489, |
| "flow/cos_sim": 0.5432424321770668, |
| "flow/improvement_ratio": 0.9650511220097542, |
| "flow/mag_ratio_mean": 0.5244949720799923, |
| "flow/mag_ratio_std": 0.21130397450178862, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.10643494891330332, |
| "grad_norm": 0.22245089709758759, |
| "learning_rate": 0.000985464388035817, |
| "loss": 1.6132733821868896, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.10643494891330332, |
| "eval_cos_loss": 0.4598613306879997, |
| "eval_loss": 1.589576181024313, |
| "eval_mse_loss": 1.359645515680313, |
| "flow/cos_sim": 0.5401386898010969, |
| "flow/improvement_ratio": 0.9610863700509071, |
| "flow/mag_ratio_mean": 0.5160716716200113, |
| "flow/mag_ratio_std": 0.21545762522146106, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.10643494891330332, |
| "eval_cos_loss": 0.4598613306879997, |
| "eval_loss": 1.589576181024313, |
| "eval_mse_loss": 1.359645515680313, |
| "eval_runtime": 3.1847, |
| "eval_samples_per_second": 627.993, |
| "eval_steps_per_second": 10.048, |
| "flow/cos_sim": 0.5401386898010969, |
| "flow/improvement_ratio": 0.9610863700509071, |
| "flow/mag_ratio_mean": 0.5160716716200113, |
| "flow/mag_ratio_std": 0.21545762522146106, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.11707844380463366, |
| "grad_norm": 0.1567550003528595, |
| "learning_rate": 0.0009810417042745768, |
| "loss": 1.6070518493652344, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.11707844380463366, |
| "eval_cos_loss": 0.4550258554518223, |
| "eval_loss": 1.577816877514124, |
| "eval_mse_loss": 1.350303951650858, |
| "flow/cos_sim": 0.5449741557240486, |
| "flow/improvement_ratio": 0.9648044053465128, |
| "flow/mag_ratio_mean": 0.5290831215679646, |
| "flow/mag_ratio_std": 0.21279342425987124, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.11707844380463366, |
| "eval_cos_loss": 0.4550258554518223, |
| "eval_loss": 1.577816877514124, |
| "eval_mse_loss": 1.350303951650858, |
| "eval_runtime": 2.6352, |
| "eval_samples_per_second": 758.945, |
| "eval_steps_per_second": 12.143, |
| "flow/cos_sim": 0.5449741557240486, |
| "flow/improvement_ratio": 0.9648044053465128, |
| "flow/mag_ratio_mean": 0.5290831215679646, |
| "flow/mag_ratio_std": 0.21279342425987124, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.127721938695964, |
| "grad_norm": 0.19316641986370087, |
| "learning_rate": 0.0009760451753569162, |
| "loss": 1.6028146743774414, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.127721938695964, |
| "eval_cos_loss": 0.45873888209462166, |
| "eval_loss": 1.5860362015664577, |
| "eval_mse_loss": 1.3566667586565018, |
| "flow/cos_sim": 0.5412612538784742, |
| "flow/improvement_ratio": 0.9610528890043497, |
| "flow/mag_ratio_mean": 0.5225661229342222, |
| "flow/mag_ratio_std": 0.2149493475444615, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.127721938695964, |
| "eval_cos_loss": 0.45873888209462166, |
| "eval_loss": 1.5860362015664577, |
| "eval_mse_loss": 1.3566667586565018, |
| "eval_runtime": 2.6091, |
| "eval_samples_per_second": 766.55, |
| "eval_steps_per_second": 12.265, |
| "flow/cos_sim": 0.5412612538784742, |
| "flow/improvement_ratio": 0.9610528890043497, |
| "flow/mag_ratio_mean": 0.5225661229342222, |
| "flow/mag_ratio_std": 0.2149493475444615, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.13836543358729433, |
| "grad_norm": 0.17066629230976105, |
| "learning_rate": 0.000970486470662755, |
| "loss": 1.5989067554473877, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.13836543358729433, |
| "eval_cos_loss": 0.4526587063446641, |
| "eval_loss": 1.5703520886600018, |
| "eval_mse_loss": 1.3440227322280407, |
| "flow/cos_sim": 0.547341376543045, |
| "flow/improvement_ratio": 0.9634687285870314, |
| "flow/mag_ratio_mean": 0.5251006819307804, |
| "flow/mag_ratio_std": 0.2169443154707551, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.13836543358729433, |
| "eval_cos_loss": 0.4526587063446641, |
| "eval_loss": 1.5703520886600018, |
| "eval_mse_loss": 1.3440227322280407, |
| "eval_runtime": 2.6502, |
| "eval_samples_per_second": 754.652, |
| "eval_steps_per_second": 12.074, |
| "flow/cos_sim": 0.547341376543045, |
| "flow/improvement_ratio": 0.9634687285870314, |
| "flow/mag_ratio_mean": 0.5251006819307804, |
| "flow/mag_ratio_std": 0.2169443154707551, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.14900892847862465, |
| "grad_norm": 0.19086262583732605, |
| "learning_rate": 0.0009643613549160033, |
| "loss": 1.5941526889801025, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.14900892847862465, |
| "eval_cos_loss": 0.45674111880362034, |
| "eval_loss": 1.5803881026804447, |
| "eval_mse_loss": 1.3520175516605377, |
| "flow/cos_sim": 0.5432589612901211, |
| "flow/improvement_ratio": 0.9569191709160805, |
| "flow/mag_ratio_mean": 0.5241195531561971, |
| "flow/mag_ratio_std": 0.2207528604194522, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.14900892847862465, |
| "eval_cos_loss": 0.45674111880362034, |
| "eval_loss": 1.5803881026804447, |
| "eval_mse_loss": 1.3520175516605377, |
| "eval_runtime": 2.84, |
| "eval_samples_per_second": 704.229, |
| "eval_steps_per_second": 11.268, |
| "flow/cos_sim": 0.5432589612901211, |
| "flow/improvement_ratio": 0.9569191709160805, |
| "flow/mag_ratio_mean": 0.5241195531561971, |
| "flow/mag_ratio_std": 0.2207528604194522, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.159652423369955, |
| "grad_norm": 0.20660291612148285, |
| "learning_rate": 0.0009576890825691249, |
| "loss": 1.5903245210647583, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.159652423369955, |
| "eval_cos_loss": 0.4470532648265362, |
| "eval_loss": 1.5533855073153973, |
| "eval_mse_loss": 1.329858873039484, |
| "flow/cos_sim": 0.5529466420412064, |
| "flow/improvement_ratio": 0.9680595081299543, |
| "flow/mag_ratio_mean": 0.5352848172187805, |
| "flow/mag_ratio_std": 0.22097993176430464, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.159652423369955, |
| "eval_cos_loss": 0.4470532648265362, |
| "eval_loss": 1.5533855073153973, |
| "eval_mse_loss": 1.329858873039484, |
| "eval_runtime": 2.6578, |
| "eval_samples_per_second": 752.494, |
| "eval_steps_per_second": 12.04, |
| "flow/cos_sim": 0.5529466420412064, |
| "flow/improvement_ratio": 0.9680595081299543, |
| "flow/mag_ratio_mean": 0.5352848172187805, |
| "flow/mag_ratio_std": 0.22097993176430464, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.17029591826128532, |
| "grad_norm": 0.23885692656040192, |
| "learning_rate": 0.0009504645698990064, |
| "loss": 1.589218020439148, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.17029591826128532, |
| "eval_cos_loss": 0.44447089545428753, |
| "eval_loss": 1.5484142042696476, |
| "eval_mse_loss": 1.3261787556111813, |
| "flow/cos_sim": 0.5555290877819061, |
| "flow/improvement_ratio": 0.9635521955788136, |
| "flow/mag_ratio_mean": 0.5299641713500023, |
| "flow/mag_ratio_std": 0.215805409476161, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.17029591826128532, |
| "eval_cos_loss": 0.44447089545428753, |
| "eval_loss": 1.5484142042696476, |
| "eval_mse_loss": 1.3261787556111813, |
| "eval_runtime": 2.6371, |
| "eval_samples_per_second": 758.402, |
| "eval_steps_per_second": 12.134, |
| "flow/cos_sim": 0.5555290877819061, |
| "flow/improvement_ratio": 0.9635521955788136, |
| "flow/mag_ratio_mean": 0.5299641713500023, |
| "flow/mag_ratio_std": 0.215805409476161, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.18093941315261566, |
| "grad_norm": 0.2062983363866806, |
| "learning_rate": 0.0009427105273394636, |
| "loss": 1.585401177406311, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.18093941315261566, |
| "eval_cos_loss": 0.45094432309269905, |
| "eval_loss": 1.5633347816765308, |
| "eval_mse_loss": 1.3378626182675362, |
| "flow/cos_sim": 0.5490557141602039, |
| "flow/improvement_ratio": 0.9632246606051922, |
| "flow/mag_ratio_mean": 0.5234426287934184, |
| "flow/mag_ratio_std": 0.22044725203886628, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.18093941315261566, |
| "eval_cos_loss": 0.45094432309269905, |
| "eval_loss": 1.5633347816765308, |
| "eval_mse_loss": 1.3378626182675362, |
| "eval_runtime": 2.5802, |
| "eval_samples_per_second": 775.125, |
| "eval_steps_per_second": 12.402, |
| "flow/cos_sim": 0.5490557141602039, |
| "flow/improvement_ratio": 0.9632246606051922, |
| "flow/mag_ratio_mean": 0.5234426287934184, |
| "flow/mag_ratio_std": 0.22044725203886628, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.19158290804394598, |
| "grad_norm": 0.15858766436576843, |
| "learning_rate": 0.0009344210469473947, |
| "loss": 1.5826770067214966, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.19158290804394598, |
| "eval_cos_loss": 0.44898632261902094, |
| "eval_loss": 1.5564597770571709, |
| "eval_mse_loss": 1.331966608762741, |
| "flow/cos_sim": 0.5510137844830751, |
| "flow/improvement_ratio": 0.9625816307961941, |
| "flow/mag_ratio_mean": 0.5298811597749591, |
| "flow/mag_ratio_std": 0.22253544814884663, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.19158290804394598, |
| "eval_cos_loss": 0.44898632261902094, |
| "eval_loss": 1.5564597770571709, |
| "eval_mse_loss": 1.331966608762741, |
| "eval_runtime": 2.5531, |
| "eval_samples_per_second": 783.347, |
| "eval_steps_per_second": 12.534, |
| "flow/cos_sim": 0.5510137844830751, |
| "flow/improvement_ratio": 0.9625816307961941, |
| "flow/mag_ratio_mean": 0.5298811597749591, |
| "flow/mag_ratio_std": 0.22253544814884663, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.20222640293527633, |
| "grad_norm": 0.2525703012943268, |
| "learning_rate": 0.0009256133361993658, |
| "loss": 1.5798900127410889, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.20222640293527633, |
| "eval_cos_loss": 0.45141084399074316, |
| "eval_loss": 1.567859135568142, |
| "eval_mse_loss": 1.3421537093818188, |
| "flow/cos_sim": 0.5485891196876764, |
| "flow/improvement_ratio": 0.96523248963058, |
| "flow/mag_ratio_mean": 0.5181732634082437, |
| "flow/mag_ratio_std": 0.22030179109424353, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.20222640293527633, |
| "eval_cos_loss": 0.45141084399074316, |
| "eval_loss": 1.567859135568142, |
| "eval_mse_loss": 1.3421537093818188, |
| "eval_runtime": 2.5625, |
| "eval_samples_per_second": 780.491, |
| "eval_steps_per_second": 12.488, |
| "flow/cos_sim": 0.5485891196876764, |
| "flow/improvement_ratio": 0.96523248963058, |
| "flow/mag_ratio_mean": 0.5181732634082437, |
| "flow/mag_ratio_std": 0.22030179109424353, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.21286989782660665, |
| "grad_norm": 0.18312996625900269, |
| "learning_rate": 0.0009163072432159066, |
| "loss": 1.579535961151123, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.21286989782660665, |
| "eval_cos_loss": 0.45327545143663883, |
| "eval_loss": 1.5679056644439697, |
| "eval_mse_loss": 1.3412679433822632, |
| "flow/cos_sim": 0.5467245355248451, |
| "flow/improvement_ratio": 0.9616729654371738, |
| "flow/mag_ratio_mean": 0.5273217614740133, |
| "flow/mag_ratio_std": 0.2235504975542426, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.21286989782660665, |
| "eval_cos_loss": 0.45327545143663883, |
| "eval_loss": 1.5679056644439697, |
| "eval_mse_loss": 1.3412679433822632, |
| "eval_runtime": 2.5697, |
| "eval_samples_per_second": 778.293, |
| "eval_steps_per_second": 12.453, |
| "flow/cos_sim": 0.5467245355248451, |
| "flow/improvement_ratio": 0.9616729654371738, |
| "flow/mag_ratio_mean": 0.5273217614740133, |
| "flow/mag_ratio_std": 0.2235504975542426, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.223513392717937, |
| "grad_norm": 0.21262691915035248, |
| "learning_rate": 0.0009064956775190607, |
| "loss": 1.577104926109314, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.223513392717937, |
| "eval_cos_loss": 0.4483450762927532, |
| "eval_loss": 1.553330171853304, |
| "eval_mse_loss": 1.329157643020153, |
| "flow/cos_sim": 0.5516549795866013, |
| "flow/improvement_ratio": 0.9627660047262907, |
| "flow/mag_ratio_mean": 0.5387043142691255, |
| "flow/mag_ratio_std": 0.2251730626448989, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.223513392717937, |
| "eval_cos_loss": 0.4483450762927532, |
| "eval_loss": 1.553330171853304, |
| "eval_mse_loss": 1.329157643020153, |
| "eval_runtime": 2.5071, |
| "eval_samples_per_second": 797.724, |
| "eval_steps_per_second": 12.764, |
| "flow/cos_sim": 0.5516549795866013, |
| "flow/improvement_ratio": 0.9627660047262907, |
| "flow/mag_ratio_mean": 0.5387043142691255, |
| "flow/mag_ratio_std": 0.2251730626448989, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.2341568876092673, |
| "grad_norm": 0.17988671362400055, |
| "learning_rate": 0.0008961991942494195, |
| "loss": 1.574266791343689, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.2341568876092673, |
| "eval_cos_loss": 0.44411917496472597, |
| "eval_loss": 1.543789055198431, |
| "eval_mse_loss": 1.3217294700443745, |
| "flow/cos_sim": 0.5558808352798223, |
| "flow/improvement_ratio": 0.9671246875077486, |
| "flow/mag_ratio_mean": 0.5348946927115321, |
| "flow/mag_ratio_std": 0.22368196118623018, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.2341568876092673, |
| "eval_cos_loss": 0.44411917496472597, |
| "eval_loss": 1.543789055198431, |
| "eval_mse_loss": 1.3217294700443745, |
| "eval_runtime": 2.5967, |
| "eval_samples_per_second": 770.215, |
| "eval_steps_per_second": 12.323, |
| "flow/cos_sim": 0.5558808352798223, |
| "flow/improvement_ratio": 0.9671246875077486, |
| "flow/mag_ratio_mean": 0.5348946927115321, |
| "flow/mag_ratio_std": 0.22368196118623018, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.24480038250059766, |
| "grad_norm": 0.22547593712806702, |
| "learning_rate": 0.0008854408194461756, |
| "loss": 1.5733323097229004, |
| "step": 23552 |
| }, |
| { |
| "epoch": 0.24480038250059766, |
| "eval_cos_loss": 0.44172694999724627, |
| "eval_loss": 1.5377833917737007, |
| "eval_mse_loss": 1.316919919103384, |
| "flow/cos_sim": 0.5582730043679476, |
| "flow/improvement_ratio": 0.9642701335251331, |
| "flow/mag_ratio_mean": 0.5346939843147993, |
| "flow/mag_ratio_std": 0.22327208751812577, |
| "step": 23552 |
| }, |
| { |
| "epoch": 0.24480038250059766, |
| "eval_cos_loss": 0.44172694999724627, |
| "eval_loss": 1.5377833917737007, |
| "eval_mse_loss": 1.316919919103384, |
| "eval_runtime": 3.1395, |
| "eval_samples_per_second": 637.04, |
| "eval_steps_per_second": 10.193, |
| "flow/cos_sim": 0.5582730043679476, |
| "flow/improvement_ratio": 0.9642701335251331, |
| "flow/mag_ratio_mean": 0.5346939843147993, |
| "flow/mag_ratio_std": 0.22327208751812577, |
| "step": 23552 |
| }, |
| { |
| "epoch": 0.255443877391928, |
| "grad_norm": 0.2300369143486023, |
| "learning_rate": 0.0008742123561119935, |
| "loss": 1.569944143295288, |
| "step": 24576 |
| }, |
| { |
| "epoch": 0.255443877391928, |
| "eval_cos_loss": 0.447942478582263, |
| "eval_loss": 1.553868442773819, |
| "eval_mse_loss": 1.3298972100019455, |
| "flow/cos_sim": 0.5520575055852532, |
| "flow/improvement_ratio": 0.9638102632015944, |
| "flow/mag_ratio_mean": 0.5306164929643273, |
| "flow/mag_ratio_std": 0.22182104969397187, |
| "step": 24576 |
| }, |
| { |
| "epoch": 0.255443877391928, |
| "eval_cos_loss": 0.447942478582263, |
| "eval_loss": 1.553868442773819, |
| "eval_mse_loss": 1.3298972100019455, |
| "eval_runtime": 2.5886, |
| "eval_samples_per_second": 772.612, |
| "eval_steps_per_second": 12.362, |
| "flow/cos_sim": 0.5520575055852532, |
| "flow/improvement_ratio": 0.9638102632015944, |
| "flow/mag_ratio_mean": 0.5306164929643273, |
| "flow/mag_ratio_std": 0.22182104969397187, |
| "step": 24576 |
| }, |
| { |
| "epoch": 0.26608737228325835, |
| "grad_norm": 0.2177908569574356, |
| "learning_rate": 0.0008625491011983832, |
| "loss": 1.5683772563934326, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.26608737228325835, |
| "eval_cos_loss": 0.45118876080960035, |
| "eval_loss": 1.5609249621629715, |
| "eval_mse_loss": 1.3353305757045746, |
| "flow/cos_sim": 0.5488111022859812, |
| "flow/improvement_ratio": 0.9652206618338823, |
| "flow/mag_ratio_mean": 0.5250881398096681, |
| "flow/mag_ratio_std": 0.22340481635183096, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.26608737228325835, |
| "eval_cos_loss": 0.45118876080960035, |
| "eval_loss": 1.5609249621629715, |
| "eval_mse_loss": 1.3353305757045746, |
| "eval_runtime": 2.5832, |
| "eval_samples_per_second": 774.241, |
| "eval_steps_per_second": 12.388, |
| "flow/cos_sim": 0.5488111022859812, |
| "flow/improvement_ratio": 0.9652206618338823, |
| "flow/mag_ratio_mean": 0.5250881398096681, |
| "flow/mag_ratio_std": 0.22340481635183096, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.27673086717458867, |
| "grad_norm": 0.13252000510692596, |
| "learning_rate": 0.0008504421682637403, |
| "loss": 1.5673582553863525, |
| "step": 26624 |
| }, |
| { |
| "epoch": 0.27673086717458867, |
| "eval_cos_loss": 0.44563145097345114, |
| "eval_loss": 1.5506689585745335, |
| "eval_mse_loss": 1.3278532326221466, |
| "flow/cos_sim": 0.5543686226010323, |
| "flow/improvement_ratio": 0.9666622839868069, |
| "flow/mag_ratio_mean": 0.5269411941990256, |
| "flow/mag_ratio_std": 0.21877468656748533, |
| "step": 26624 |
| }, |
| { |
| "epoch": 0.27673086717458867, |
| "eval_cos_loss": 0.44563145097345114, |
| "eval_loss": 1.5506689585745335, |
| "eval_mse_loss": 1.3278532326221466, |
| "eval_runtime": 2.617, |
| "eval_samples_per_second": 764.244, |
| "eval_steps_per_second": 12.228, |
| "flow/cos_sim": 0.5543686226010323, |
| "flow/improvement_ratio": 0.9666622839868069, |
| "flow/mag_ratio_mean": 0.5269411941990256, |
| "flow/mag_ratio_std": 0.21877468656748533, |
| "step": 26624 |
| }, |
| { |
| "epoch": 0.287374362065919, |
| "grad_norm": 0.2598721981048584, |
| "learning_rate": 0.0008379296157504366, |
| "loss": 1.564971923828125, |
| "step": 27648 |
| }, |
| { |
| "epoch": 0.287374362065919, |
| "eval_cos_loss": 0.4419550793245435, |
| "eval_loss": 1.5396056547760963, |
| "eval_mse_loss": 1.3186281062662601, |
| "flow/cos_sim": 0.5580449867993593, |
| "flow/improvement_ratio": 0.9673310127109289, |
| "flow/mag_ratio_mean": 0.5319117670878768, |
| "flow/mag_ratio_std": 0.22065124148502946, |
| "step": 27648 |
| }, |
| { |
| "epoch": 0.287374362065919, |
| "eval_cos_loss": 0.4419550793245435, |
| "eval_loss": 1.5396056547760963, |
| "eval_mse_loss": 1.3186281062662601, |
| "eval_runtime": 3.0156, |
| "eval_samples_per_second": 663.219, |
| "eval_steps_per_second": 10.612, |
| "flow/cos_sim": 0.5580449867993593, |
| "flow/improvement_ratio": 0.9673310127109289, |
| "flow/mag_ratio_mean": 0.5319117670878768, |
| "flow/mag_ratio_std": 0.22065124148502946, |
| "step": 27648 |
| }, |
| { |
| "epoch": 0.2980178569572493, |
| "grad_norm": 0.274239718914032, |
| "learning_rate": 0.0008250147265053921, |
| "loss": 1.564111590385437, |
| "step": 28672 |
| }, |
| { |
| "epoch": 0.2980178569572493, |
| "eval_cos_loss": 0.44205061066895723, |
| "eval_loss": 1.5401594452559948, |
| "eval_mse_loss": 1.319134145975113, |
| "flow/cos_sim": 0.5579493436962366, |
| "flow/improvement_ratio": 0.9671094436198473, |
| "flow/mag_ratio_mean": 0.5367719177156687, |
| "flow/mag_ratio_std": 0.2205441533587873, |
| "step": 28672 |
| }, |
| { |
| "epoch": 0.2980178569572493, |
| "eval_cos_loss": 0.44205061066895723, |
| "eval_loss": 1.5401594452559948, |
| "eval_mse_loss": 1.319134145975113, |
| "eval_runtime": 2.6525, |
| "eval_samples_per_second": 754.016, |
| "eval_steps_per_second": 12.064, |
| "flow/cos_sim": 0.5579493436962366, |
| "flow/improvement_ratio": 0.9671094436198473, |
| "flow/mag_ratio_mean": 0.5367719177156687, |
| "flow/mag_ratio_std": 0.2205441533587873, |
| "step": 28672 |
| }, |
| { |
| "epoch": 0.3086613518485797, |
| "grad_norm": 0.2095516473054886, |
| "learning_rate": 0.000811699689939724, |
| "loss": 1.5625946521759033, |
| "step": 29696 |
| }, |
| { |
| "epoch": 0.3086613518485797, |
| "eval_cos_loss": 0.44621053244918585, |
| "eval_loss": 1.5496392995119095, |
| "eval_mse_loss": 1.3265340402722359, |
| "flow/cos_sim": 0.5537894666194916, |
| "flow/improvement_ratio": 0.9614951889961958, |
| "flow/mag_ratio_mean": 0.5277672996744514, |
| "flow/mag_ratio_std": 0.22011788561940193, |
| "step": 29696 |
| }, |
| { |
| "epoch": 0.3086613518485797, |
| "eval_cos_loss": 0.44621053244918585, |
| "eval_loss": 1.5496392995119095, |
| "eval_mse_loss": 1.3265340402722359, |
| "eval_runtime": 2.5404, |
| "eval_samples_per_second": 787.269, |
| "eval_steps_per_second": 12.596, |
| "flow/cos_sim": 0.5537894666194916, |
| "flow/improvement_ratio": 0.9614951889961958, |
| "flow/mag_ratio_mean": 0.5277672996744514, |
| "flow/mag_ratio_std": 0.22011788561940193, |
| "step": 29696 |
| }, |
| { |
| "epoch": 0.31930484673991, |
| "grad_norm": 0.15808935463428497, |
| "learning_rate": 0.0007980128200054721, |
| "loss": 1.5646651983261108, |
| "step": 30720 |
| }, |
| { |
| "epoch": 0.31930484673991, |
| "eval_cos_loss": 0.44148214533925056, |
| "eval_loss": 1.5364415682852268, |
| "eval_mse_loss": 1.3157004974782467, |
| "flow/cos_sim": 0.5585179291665554, |
| "flow/improvement_ratio": 0.9648495689034462, |
| "flow/mag_ratio_mean": 0.537518884986639, |
| "flow/mag_ratio_std": 0.22350562876090407, |
| "step": 30720 |
| }, |
| { |
| "epoch": 0.31930484673991, |
| "eval_cos_loss": 0.44148214533925056, |
| "eval_loss": 1.5364415682852268, |
| "eval_mse_loss": 1.3157004974782467, |
| "eval_runtime": 2.5821, |
| "eval_samples_per_second": 774.564, |
| "eval_steps_per_second": 12.393, |
| "flow/cos_sim": 0.5585179291665554, |
| "flow/improvement_ratio": 0.9648495689034462, |
| "flow/mag_ratio_mean": 0.537518884986639, |
| "flow/mag_ratio_std": 0.22350562876090407, |
| "step": 30720 |
| }, |
| { |
| "epoch": 0.3299483416312403, |
| "grad_norm": 0.1921176314353943, |
| "learning_rate": 0.0007839843253324567, |
| "loss": 1.563474416732788, |
| "step": 31744 |
| }, |
| { |
| "epoch": 0.3299483416312403, |
| "eval_cos_loss": 0.4404078619554639, |
| "eval_loss": 1.534349039196968, |
| "eval_mse_loss": 1.3141451105475426, |
| "flow/cos_sim": 0.5595921669155359, |
| "flow/improvement_ratio": 0.9649890139698982, |
| "flow/mag_ratio_mean": 0.5285989735275507, |
| "flow/mag_ratio_std": 0.22230371600016952, |
| "step": 31744 |
| }, |
| { |
| "epoch": 0.3299483416312403, |
| "eval_cos_loss": 0.4404078619554639, |
| "eval_loss": 1.534349039196968, |
| "eval_mse_loss": 1.3141451105475426, |
| "eval_runtime": 2.5419, |
| "eval_samples_per_second": 786.808, |
| "eval_steps_per_second": 12.589, |
| "flow/cos_sim": 0.5595921669155359, |
| "flow/improvement_ratio": 0.9649890139698982, |
| "flow/mag_ratio_mean": 0.5285989735275507, |
| "flow/mag_ratio_std": 0.22230371600016952, |
| "step": 31744 |
| }, |
| { |
| "epoch": 0.34059183652257063, |
| "grad_norm": 0.19485324621200562, |
| "learning_rate": 0.0007696035173607825, |
| "loss": 1.5625982284545898, |
| "step": 32768 |
| }, |
| { |
| "epoch": 0.34059183652257063, |
| "eval_cos_loss": 0.4499282343313098, |
| "eval_loss": 1.5567349456250668, |
| "eval_mse_loss": 1.3317708261311054, |
| "flow/cos_sim": 0.5500718057155609, |
| "flow/improvement_ratio": 0.9645203202962875, |
| "flow/mag_ratio_mean": 0.5316947773098946, |
| "flow/mag_ratio_std": 0.22561145247891545, |
| "step": 32768 |
| }, |
| { |
| "epoch": 0.34059183652257063, |
| "eval_cos_loss": 0.4499282343313098, |
| "eval_loss": 1.5567349456250668, |
| "eval_mse_loss": 1.3317708261311054, |
| "eval_runtime": 2.5397, |
| "eval_samples_per_second": 787.504, |
| "eval_steps_per_second": 12.6, |
| "flow/cos_sim": 0.5500718057155609, |
| "flow/improvement_ratio": 0.9645203202962875, |
| "flow/mag_ratio_mean": 0.5316947773098946, |
| "flow/mag_ratio_std": 0.22561145247891545, |
| "step": 32768 |
| }, |
| { |
| "epoch": 0.351235331413901, |
| "grad_norm": 0.18854600191116333, |
| "learning_rate": 0.0007549156025151, |
| "loss": 1.5604270696640015, |
| "step": 33792 |
| }, |
| { |
| "epoch": 0.351235331413901, |
| "eval_cos_loss": 0.44613189715892076, |
| "eval_loss": 1.5455855540931225, |
| "eval_mse_loss": 1.322519600391388, |
| "flow/cos_sim": 0.5538681279867887, |
| "flow/improvement_ratio": 0.9590773209929466, |
| "flow/mag_ratio_mean": 0.5333189619705081, |
| "flow/mag_ratio_std": 0.22768286149948835, |
| "step": 33792 |
| }, |
| { |
| "epoch": 0.351235331413901, |
| "eval_cos_loss": 0.44613189715892076, |
| "eval_loss": 1.5455855540931225, |
| "eval_mse_loss": 1.322519600391388, |
| "eval_runtime": 2.5511, |
| "eval_samples_per_second": 783.987, |
| "eval_steps_per_second": 12.544, |
| "flow/cos_sim": 0.5538681279867887, |
| "flow/improvement_ratio": 0.9590773209929466, |
| "flow/mag_ratio_mean": 0.5333189619705081, |
| "flow/mag_ratio_std": 0.22768286149948835, |
| "step": 33792 |
| }, |
| { |
| "epoch": 0.3618788263052313, |
| "grad_norm": 0.1909618228673935, |
| "learning_rate": 0.0007399093898115421, |
| "loss": 1.559531807899475, |
| "step": 34816 |
| }, |
| { |
| "epoch": 0.3618788263052313, |
| "eval_cos_loss": 0.4420803328976035, |
| "eval_loss": 1.5389960557222366, |
| "eval_mse_loss": 1.3179558925330639, |
| "flow/cos_sim": 0.5579197406768799, |
| "flow/improvement_ratio": 0.959646550938487, |
| "flow/mag_ratio_mean": 0.5326429791748524, |
| "flow/mag_ratio_std": 0.22111017350107431, |
| "step": 34816 |
| }, |
| { |
| "epoch": 0.3618788263052313, |
| "eval_cos_loss": 0.4420803328976035, |
| "eval_loss": 1.5389960557222366, |
| "eval_mse_loss": 1.3179558925330639, |
| "eval_runtime": 2.5395, |
| "eval_samples_per_second": 787.558, |
| "eval_steps_per_second": 12.601, |
| "flow/cos_sim": 0.5579197406768799, |
| "flow/improvement_ratio": 0.959646550938487, |
| "flow/mag_ratio_mean": 0.5326429791748524, |
| "flow/mag_ratio_std": 0.22111017350107431, |
| "step": 34816 |
| }, |
| { |
| "epoch": 0.37252232119656165, |
| "grad_norm": 0.1629696786403656, |
| "learning_rate": 0.0007246320516499633, |
| "loss": 1.5580956935882568, |
| "step": 35840 |
| }, |
| { |
| "epoch": 0.37252232119656165, |
| "eval_cos_loss": 0.44205798115581274, |
| "eval_loss": 1.5348509810864925, |
| "eval_mse_loss": 1.3138219900429249, |
| "flow/cos_sim": 0.5579419694840908, |
| "flow/improvement_ratio": 0.9621348176151514, |
| "flow/mag_ratio_mean": 0.5318824276328087, |
| "flow/mag_ratio_std": 0.226469362154603, |
| "step": 35840 |
| }, |
| { |
| "epoch": 0.37252232119656165, |
| "eval_cos_loss": 0.44205798115581274, |
| "eval_loss": 1.5348509810864925, |
| "eval_mse_loss": 1.3138219900429249, |
| "eval_runtime": 2.557, |
| "eval_samples_per_second": 782.177, |
| "eval_steps_per_second": 12.515, |
| "flow/cos_sim": 0.5579419694840908, |
| "flow/improvement_ratio": 0.9621348176151514, |
| "flow/mag_ratio_mean": 0.5318824276328087, |
| "flow/mag_ratio_std": 0.226469362154603, |
| "step": 35840 |
| }, |
| { |
| "epoch": 0.38316581608789196, |
| "grad_norm": 0.2269536405801773, |
| "learning_rate": 0.0007090719479543767, |
| "loss": 1.557045578956604, |
| "step": 36864 |
| }, |
| { |
| "epoch": 0.38316581608789196, |
| "eval_cos_loss": 0.4411419341340661, |
| "eval_loss": 1.5364714972674847, |
| "eval_mse_loss": 1.315900530666113, |
| "flow/cos_sim": 0.5588581711053848, |
| "flow/improvement_ratio": 0.965403363108635, |
| "flow/mag_ratio_mean": 0.5407936815172434, |
| "flow/mag_ratio_std": 0.22326642088592052, |
| "step": 36864 |
| }, |
| { |
| "epoch": 0.38316581608789196, |
| "eval_cos_loss": 0.4411419341340661, |
| "eval_loss": 1.5364714972674847, |
| "eval_mse_loss": 1.315900530666113, |
| "eval_runtime": 2.5319, |
| "eval_samples_per_second": 789.932, |
| "eval_steps_per_second": 12.639, |
| "flow/cos_sim": 0.5588581711053848, |
| "flow/improvement_ratio": 0.965403363108635, |
| "flow/mag_ratio_mean": 0.5407936815172434, |
| "flow/mag_ratio_std": 0.22326642088592052, |
| "step": 36864 |
| }, |
| { |
| "epoch": 0.39380931097922234, |
| "grad_norm": 0.18450024724006653, |
| "learning_rate": 0.0006932779922946351, |
| "loss": 1.5578693151474, |
| "step": 37888 |
| }, |
| { |
| "epoch": 0.39380931097922234, |
| "eval_cos_loss": 0.44131703954190016, |
| "eval_loss": 1.5340029932558537, |
| "eval_mse_loss": 1.3133444860577583, |
| "flow/cos_sim": 0.5586829409003258, |
| "flow/improvement_ratio": 0.9628860391676426, |
| "flow/mag_ratio_mean": 0.537332147359848, |
| "flow/mag_ratio_std": 0.22678001504391432, |
| "step": 37888 |
| }, |
| { |
| "epoch": 0.39380931097922234, |
| "eval_cos_loss": 0.44131703954190016, |
| "eval_loss": 1.5340029932558537, |
| "eval_mse_loss": 1.3133444860577583, |
| "eval_runtime": 2.555, |
| "eval_samples_per_second": 782.783, |
| "eval_steps_per_second": 12.525, |
| "flow/cos_sim": 0.5586829409003258, |
| "flow/improvement_ratio": 0.9628860391676426, |
| "flow/mag_ratio_mean": 0.537332147359848, |
| "flow/mag_ratio_std": 0.22678001504391432, |
| "step": 37888 |
| }, |
| { |
| "epoch": 0.40445280587055266, |
| "grad_norm": 0.20654521882534027, |
| "learning_rate": 0.0006772381509746807, |
| "loss": 1.5568833351135254, |
| "step": 38912 |
| }, |
| { |
| "epoch": 0.40445280587055266, |
| "eval_cos_loss": 0.4440508605912328, |
| "eval_loss": 1.540926594287157, |
| "eval_mse_loss": 1.3189011700451374, |
| "flow/cos_sim": 0.5559491030871868, |
| "flow/improvement_ratio": 0.964597575366497, |
| "flow/mag_ratio_mean": 0.5335862170904875, |
| "flow/mag_ratio_std": 0.22821834543719888, |
| "step": 38912 |
| }, |
| { |
| "epoch": 0.40445280587055266, |
| "eval_cos_loss": 0.4440508605912328, |
| "eval_loss": 1.540926594287157, |
| "eval_mse_loss": 1.3189011700451374, |
| "eval_runtime": 2.9265, |
| "eval_samples_per_second": 683.411, |
| "eval_steps_per_second": 10.935, |
| "flow/cos_sim": 0.5559491030871868, |
| "flow/improvement_ratio": 0.964597575366497, |
| "flow/mag_ratio_mean": 0.5335862170904875, |
| "flow/mag_ratio_std": 0.22821834543719888, |
| "step": 38912 |
| }, |
| { |
| "epoch": 0.415096300761883, |
| "grad_norm": 0.20580987632274628, |
| "learning_rate": 0.0006609868783930164, |
| "loss": 1.5565650463104248, |
| "step": 39936 |
| }, |
| { |
| "epoch": 0.415096300761883, |
| "eval_cos_loss": 0.4446534486487508, |
| "eval_loss": 1.5423276983201504, |
| "eval_mse_loss": 1.3200009688735008, |
| "flow/cos_sim": 0.5553465932607651, |
| "flow/improvement_ratio": 0.9601697400212288, |
| "flow/mag_ratio_mean": 0.5308061949908733, |
| "flow/mag_ratio_std": 0.22724535362794995, |
| "step": 39936 |
| }, |
| { |
| "epoch": 0.415096300761883, |
| "eval_cos_loss": 0.4446534486487508, |
| "eval_loss": 1.5423276983201504, |
| "eval_mse_loss": 1.3200009688735008, |
| "eval_runtime": 2.5315, |
| "eval_samples_per_second": 790.058, |
| "eval_steps_per_second": 12.641, |
| "flow/cos_sim": 0.5553465932607651, |
| "flow/improvement_ratio": 0.9601697400212288, |
| "flow/mag_ratio_mean": 0.5308061949908733, |
| "flow/mag_ratio_std": 0.22724535362794995, |
| "step": 39936 |
| }, |
| { |
| "epoch": 0.4257397956532133, |
| "grad_norm": 0.17006264626979828, |
| "learning_rate": 0.0006445597062966236, |
| "loss": 1.5565887689590454, |
| "step": 40960 |
| }, |
| { |
| "epoch": 0.4257397956532133, |
| "eval_cos_loss": 0.4380533881485462, |
| "eval_loss": 1.5283529199659824, |
| "eval_mse_loss": 1.3093262203037739, |
| "flow/cos_sim": 0.5619466044008732, |
| "flow/improvement_ratio": 0.9661596808582544, |
| "flow/mag_ratio_mean": 0.5353248585015535, |
| "flow/mag_ratio_std": 0.22140436619520187, |
| "step": 40960 |
| }, |
| { |
| "epoch": 0.4257397956532133, |
| "eval_cos_loss": 0.4380533881485462, |
| "eval_loss": 1.5283529199659824, |
| "eval_mse_loss": 1.3093262203037739, |
| "eval_runtime": 2.6667, |
| "eval_samples_per_second": 749.988, |
| "eval_steps_per_second": 12.0, |
| "flow/cos_sim": 0.5619466044008732, |
| "flow/improvement_ratio": 0.9661596808582544, |
| "flow/mag_ratio_mean": 0.5353248585015535, |
| "flow/mag_ratio_std": 0.22140436619520187, |
| "step": 40960 |
| }, |
| { |
| "epoch": 0.43638329054454367, |
| "grad_norm": 0.2264794260263443, |
| "learning_rate": 0.0006279604223844502, |
| "loss": 1.5556617975234985, |
| "step": 41984 |
| }, |
| { |
| "epoch": 0.43638329054454367, |
| "eval_cos_loss": 0.4411089513450861, |
| "eval_loss": 1.5351563543081284, |
| "eval_mse_loss": 1.3146018758416176, |
| "flow/cos_sim": 0.5588910467922688, |
| "flow/improvement_ratio": 0.9700996112078428, |
| "flow/mag_ratio_mean": 0.535472328774631, |
| "flow/mag_ratio_std": 0.2249652906320989, |
| "step": 41984 |
| }, |
| { |
| "epoch": 0.43638329054454367, |
| "eval_cos_loss": 0.4411089513450861, |
| "eval_loss": 1.5351563543081284, |
| "eval_mse_loss": 1.3146018758416176, |
| "eval_runtime": 2.6645, |
| "eval_samples_per_second": 750.607, |
| "eval_steps_per_second": 12.01, |
| "flow/cos_sim": 0.5588910467922688, |
| "flow/improvement_ratio": 0.9700996112078428, |
| "flow/mag_ratio_mean": 0.535472328774631, |
| "flow/mag_ratio_std": 0.2249652906320989, |
| "step": 41984 |
| }, |
| { |
| "epoch": 0.447026785435874, |
| "grad_norm": 0.27407148480415344, |
| "learning_rate": 0.0006111923466049098, |
| "loss": 1.5525274276733398, |
| "step": 43008 |
| }, |
| { |
| "epoch": 0.447026785435874, |
| "eval_cos_loss": 0.43889701180160046, |
| "eval_loss": 1.5268253944814205, |
| "eval_mse_loss": 1.3073768950998783, |
| "flow/cos_sim": 0.561102925799787, |
| "flow/improvement_ratio": 0.9647987205535173, |
| "flow/mag_ratio_mean": 0.5332341426983476, |
| "flow/mag_ratio_std": 0.22829985432326794, |
| "step": 43008 |
| }, |
| { |
| "epoch": 0.447026785435874, |
| "eval_cos_loss": 0.43889701180160046, |
| "eval_loss": 1.5268253944814205, |
| "eval_mse_loss": 1.3073768950998783, |
| "eval_runtime": 2.7939, |
| "eval_samples_per_second": 715.846, |
| "eval_steps_per_second": 11.454, |
| "flow/cos_sim": 0.561102925799787, |
| "flow/improvement_ratio": 0.9647987205535173, |
| "flow/mag_ratio_mean": 0.5332341426983476, |
| "flow/mag_ratio_std": 0.22829985432326794, |
| "step": 43008 |
| }, |
| { |
| "epoch": 0.4576702803272043, |
| "grad_norm": 0.1399686485528946, |
| "learning_rate": 0.0005942916270463306, |
| "loss": 1.5545454025268555, |
| "step": 44032 |
| }, |
| { |
| "epoch": 0.4576702803272043, |
| "eval_cos_loss": 0.44924431946128607, |
| "eval_loss": 1.553151711821556, |
| "eval_mse_loss": 1.3285295516252518, |
| "flow/cos_sim": 0.550755743868649, |
| "flow/improvement_ratio": 0.9646315854042768, |
| "flow/mag_ratio_mean": 0.5285015730187297, |
| "flow/mag_ratio_std": 0.22717531491070986, |
| "step": 44032 |
| }, |
| { |
| "epoch": 0.4576702803272043, |
| "eval_cos_loss": 0.44924431946128607, |
| "eval_loss": 1.553151711821556, |
| "eval_mse_loss": 1.3285295516252518, |
| "eval_runtime": 2.6015, |
| "eval_samples_per_second": 768.789, |
| "eval_steps_per_second": 12.301, |
| "flow/cos_sim": 0.550755743868649, |
| "flow/improvement_ratio": 0.9646315854042768, |
| "flow/mag_ratio_mean": 0.5285015730187297, |
| "flow/mag_ratio_std": 0.22717531491070986, |
| "step": 44032 |
| }, |
| { |
| "epoch": 0.4683137752185346, |
| "grad_norm": 0.17331954836845398, |
| "learning_rate": 0.0005772784249462365, |
| "loss": 1.5531715154647827, |
| "step": 45056 |
| }, |
| { |
| "epoch": 0.4683137752185346, |
| "eval_cos_loss": 0.44831305276602507, |
| "eval_loss": 1.55314514413476, |
| "eval_mse_loss": 1.328988615423441, |
| "flow/cos_sim": 0.5516869705170393, |
| "flow/improvement_ratio": 0.9640028644353151, |
| "flow/mag_ratio_mean": 0.53109060972929, |
| "flow/mag_ratio_std": 0.22524388320744038, |
| "step": 45056 |
| }, |
| { |
| "epoch": 0.4683137752185346, |
| "eval_cos_loss": 0.44831305276602507, |
| "eval_loss": 1.55314514413476, |
| "eval_mse_loss": 1.328988615423441, |
| "eval_runtime": 2.6383, |
| "eval_samples_per_second": 758.058, |
| "eval_steps_per_second": 12.129, |
| "flow/cos_sim": 0.5516869705170393, |
| "flow/improvement_ratio": 0.9640028644353151, |
| "flow/mag_ratio_mean": 0.53109060972929, |
| "flow/mag_ratio_std": 0.22524388320744038, |
| "step": 45056 |
| }, |
| { |
| "epoch": 0.478957270109865, |
| "grad_norm": 0.20901691913604736, |
| "learning_rate": 0.0005601730357250316, |
| "loss": 1.5534908771514893, |
| "step": 46080 |
| }, |
| { |
| "epoch": 0.478957270109865, |
| "eval_cos_loss": 0.4380967328324914, |
| "eval_loss": 1.5263510905206203, |
| "eval_mse_loss": 1.3073027282953262, |
| "flow/cos_sim": 0.5619033649563789, |
| "flow/improvement_ratio": 0.9646317362785339, |
| "flow/mag_ratio_mean": 0.5367344031110406, |
| "flow/mag_ratio_std": 0.22356789046898484, |
| "step": 46080 |
| }, |
| { |
| "epoch": 0.478957270109865, |
| "eval_cos_loss": 0.4380967328324914, |
| "eval_loss": 1.5263510905206203, |
| "eval_mse_loss": 1.3073027282953262, |
| "eval_runtime": 3.1077, |
| "eval_samples_per_second": 643.555, |
| "eval_steps_per_second": 10.297, |
| "flow/cos_sim": 0.5619033649563789, |
| "flow/improvement_ratio": 0.9646317362785339, |
| "flow/mag_ratio_mean": 0.5367344031110406, |
| "flow/mag_ratio_std": 0.22356789046898484, |
| "step": 46080 |
| }, |
| { |
| "epoch": 0.4896007650011953, |
| "grad_norm": 0.18614411354064941, |
| "learning_rate": 0.0005430126677168879, |
| "loss": 1.5517550706863403, |
| "step": 47104 |
| }, |
| { |
| "epoch": 0.4896007650011953, |
| "eval_cos_loss": 0.4381159236654639, |
| "eval_loss": 1.5279735252261162, |
| "eval_mse_loss": 1.308915562927723, |
| "flow/cos_sim": 0.5618840865790844, |
| "flow/improvement_ratio": 0.9676383044570684, |
| "flow/mag_ratio_mean": 0.5347359916195273, |
| "flow/mag_ratio_std": 0.22025129199028015, |
| "step": 47104 |
| }, |
| { |
| "epoch": 0.4896007650011953, |
| "eval_cos_loss": 0.4381159236654639, |
| "eval_loss": 1.5279735252261162, |
| "eval_mse_loss": 1.308915562927723, |
| "eval_runtime": 2.5963, |
| "eval_samples_per_second": 770.338, |
| "eval_steps_per_second": 12.325, |
| "flow/cos_sim": 0.5618840865790844, |
| "flow/improvement_ratio": 0.9676383044570684, |
| "flow/mag_ratio_mean": 0.5347359916195273, |
| "flow/mag_ratio_std": 0.22025129199028015, |
| "step": 47104 |
| }, |
| { |
| "epoch": 0.5002442598925256, |
| "grad_norm": 0.20864352583885193, |
| "learning_rate": 0.0005257842461318475, |
| "loss": 1.5500738620758057, |
| "step": 48128 |
| }, |
| { |
| "epoch": 0.5002442598925256, |
| "eval_cos_loss": 0.4427802488207817, |
| "eval_loss": 1.5378683991730213, |
| "eval_mse_loss": 1.3164782784879208, |
| "flow/cos_sim": 0.5572197437286377, |
| "flow/improvement_ratio": 0.9660468604415655, |
| "flow/mag_ratio_mean": 0.5358876623213291, |
| "flow/mag_ratio_std": 0.22517190361395478, |
| "step": 48128 |
| }, |
| { |
| "epoch": 0.5002442598925256, |
| "eval_cos_loss": 0.4427802488207817, |
| "eval_loss": 1.5378683991730213, |
| "eval_mse_loss": 1.3164782784879208, |
| "eval_runtime": 2.7741, |
| "eval_samples_per_second": 720.965, |
| "eval_steps_per_second": 11.535, |
| "flow/cos_sim": 0.5572197437286377, |
| "flow/improvement_ratio": 0.9660468604415655, |
| "flow/mag_ratio_mean": 0.5358876623213291, |
| "flow/mag_ratio_std": 0.22517190361395478, |
| "step": 48128 |
| }, |
| { |
| "epoch": 0.510887754783856, |
| "grad_norm": 0.18199937045574188, |
| "learning_rate": 0.0005085250659563913, |
| "loss": 1.5504491329193115, |
| "step": 49152 |
| }, |
| { |
| "epoch": 0.510887754783856, |
| "eval_cos_loss": 0.4387433025985956, |
| "eval_loss": 1.5289665646851063, |
| "eval_mse_loss": 1.309594914317131, |
| "flow/cos_sim": 0.561256805434823, |
| "flow/improvement_ratio": 0.9640381913632154, |
| "flow/mag_ratio_mean": 0.5361321400851011, |
| "flow/mag_ratio_std": 0.22544911736622453, |
| "step": 49152 |
| }, |
| { |
| "epoch": 0.510887754783856, |
| "eval_cos_loss": 0.4387433025985956, |
| "eval_loss": 1.5289665646851063, |
| "eval_mse_loss": 1.309594914317131, |
| "eval_runtime": 2.5843, |
| "eval_samples_per_second": 773.896, |
| "eval_steps_per_second": 12.382, |
| "flow/cos_sim": 0.561256805434823, |
| "flow/improvement_ratio": 0.9640381913632154, |
| "flow/mag_ratio_mean": 0.5361321400851011, |
| "flow/mag_ratio_std": 0.22544911736622453, |
| "step": 49152 |
| }, |
| { |
| "epoch": 0.5215312496751863, |
| "grad_norm": 0.21871572732925415, |
| "learning_rate": 0.0004912557160435426, |
| "loss": 1.551537036895752, |
| "step": 50176 |
| }, |
| { |
| "epoch": 0.5215312496751863, |
| "eval_cos_loss": 0.4424938661977649, |
| "eval_loss": 1.5380571633577347, |
| "eval_mse_loss": 1.316810242831707, |
| "flow/cos_sim": 0.5575060974806547, |
| "flow/improvement_ratio": 0.9636888317763805, |
| "flow/mag_ratio_mean": 0.5369405504316092, |
| "flow/mag_ratio_std": 0.22526462702080607, |
| "step": 50176 |
| }, |
| { |
| "epoch": 0.5215312496751863, |
| "eval_cos_loss": 0.4424938661977649, |
| "eval_loss": 1.5380571633577347, |
| "eval_mse_loss": 1.316810242831707, |
| "eval_runtime": 2.6421, |
| "eval_samples_per_second": 756.983, |
| "eval_steps_per_second": 12.112, |
| "flow/cos_sim": 0.5575060974806547, |
| "flow/improvement_ratio": 0.9636888317763805, |
| "flow/mag_ratio_mean": 0.5369405504316092, |
| "flow/mag_ratio_std": 0.22526462702080607, |
| "step": 50176 |
| }, |
| { |
| "epoch": 0.5321747445665167, |
| "grad_norm": 0.19360916316509247, |
| "learning_rate": 0.000474013640007982, |
| "loss": 1.550221562385559, |
| "step": 51200 |
| }, |
| { |
| "epoch": 0.5321747445665167, |
| "eval_cos_loss": 0.44469246733933687, |
| "eval_loss": 1.5433855392038822, |
| "eval_mse_loss": 1.3210392966866493, |
| "flow/cos_sim": 0.5553075838834047, |
| "flow/improvement_ratio": 0.9643128626048565, |
| "flow/mag_ratio_mean": 0.5328638143837452, |
| "flow/mag_ratio_std": 0.2267028819769621, |
| "step": 51200 |
| }, |
| { |
| "epoch": 0.5321747445665167, |
| "eval_cos_loss": 0.44469246733933687, |
| "eval_loss": 1.5433855392038822, |
| "eval_mse_loss": 1.3210392966866493, |
| "eval_runtime": 2.5901, |
| "eval_samples_per_second": 772.182, |
| "eval_steps_per_second": 12.355, |
| "flow/cos_sim": 0.5553075838834047, |
| "flow/improvement_ratio": 0.9643128626048565, |
| "flow/mag_ratio_mean": 0.5328638143837452, |
| "flow/mag_ratio_std": 0.2267028819769621, |
| "step": 51200 |
| }, |
| { |
| "epoch": 0.542818239457847, |
| "grad_norm": 0.25140267610549927, |
| "learning_rate": 0.0004567857008049507, |
| "loss": 1.5501980781555176, |
| "step": 52224 |
| }, |
| { |
| "epoch": 0.542818239457847, |
| "eval_cos_loss": 0.44003486074507236, |
| "eval_loss": 1.5340061485767365, |
| "eval_mse_loss": 1.3139887191355228, |
| "flow/cos_sim": 0.5599651224911213, |
| "flow/improvement_ratio": 0.9662698730826378, |
| "flow/mag_ratio_mean": 0.5311327101662755, |
| "flow/mag_ratio_std": 0.21956392657011747, |
| "step": 52224 |
| }, |
| { |
| "epoch": 0.542818239457847, |
| "eval_cos_loss": 0.44003486074507236, |
| "eval_loss": 1.5340061485767365, |
| "eval_mse_loss": 1.3139887191355228, |
| "eval_runtime": 2.5514, |
| "eval_samples_per_second": 783.875, |
| "eval_steps_per_second": 12.542, |
| "flow/cos_sim": 0.5599651224911213, |
| "flow/improvement_ratio": 0.9662698730826378, |
| "flow/mag_ratio_mean": 0.5311327101662755, |
| "flow/mag_ratio_std": 0.21956392657011747, |
| "step": 52224 |
| }, |
| { |
| "epoch": 0.5534617343491773, |
| "grad_norm": 0.1874593198299408, |
| "learning_rate": 0.0004396260548863663, |
| "loss": 1.5494704246520996, |
| "step": 53248 |
| }, |
| { |
| "epoch": 0.5534617343491773, |
| "eval_cos_loss": 0.4375583464279771, |
| "eval_loss": 1.5273119732737541, |
| "eval_mse_loss": 1.3085327930748463, |
| "flow/cos_sim": 0.5624416321516037, |
| "flow/improvement_ratio": 0.9654307011514902, |
| "flow/mag_ratio_mean": 0.5390398278832436, |
| "flow/mag_ratio_std": 0.2204155451618135, |
| "step": 53248 |
| }, |
| { |
| "epoch": 0.5534617343491773, |
| "eval_cos_loss": 0.4375583464279771, |
| "eval_loss": 1.5273119732737541, |
| "eval_mse_loss": 1.3085327930748463, |
| "eval_runtime": 2.9751, |
| "eval_samples_per_second": 672.236, |
| "eval_steps_per_second": 10.756, |
| "flow/cos_sim": 0.5624416321516037, |
| "flow/improvement_ratio": 0.9654307011514902, |
| "flow/mag_ratio_mean": 0.5390398278832436, |
| "flow/mag_ratio_std": 0.2204155451618135, |
| "step": 53248 |
| }, |
| { |
| "epoch": 0.5641052292405077, |
| "grad_norm": 0.260960191488266, |
| "learning_rate": 0.000422521628012444, |
| "loss": 1.5488193035125732, |
| "step": 54272 |
| }, |
| { |
| "epoch": 0.5641052292405077, |
| "eval_cos_loss": 0.4423528155311942, |
| "eval_loss": 1.5401365533471107, |
| "eval_mse_loss": 1.318960152566433, |
| "flow/cos_sim": 0.5576471537351608, |
| "flow/improvement_ratio": 0.9645018931478262, |
| "flow/mag_ratio_mean": 0.5321623589843512, |
| "flow/mag_ratio_std": 0.22070467984303832, |
| "step": 54272 |
| }, |
| { |
| "epoch": 0.5641052292405077, |
| "eval_cos_loss": 0.4423528155311942, |
| "eval_loss": 1.5401365533471107, |
| "eval_mse_loss": 1.318960152566433, |
| "eval_runtime": 2.7006, |
| "eval_samples_per_second": 740.582, |
| "eval_steps_per_second": 11.849, |
| "flow/cos_sim": 0.5576471537351608, |
| "flow/improvement_ratio": 0.9645018931478262, |
| "flow/mag_ratio_mean": 0.5321623589843512, |
| "flow/mag_ratio_std": 0.22070467984303832, |
| "step": 54272 |
| }, |
| { |
| "epoch": 0.574748724131838, |
| "grad_norm": 0.18275974690914154, |
| "learning_rate": 0.00040552618837104806, |
| "loss": 1.5481247901916504, |
| "step": 55296 |
| }, |
| { |
| "epoch": 0.574748724131838, |
| "eval_cos_loss": 0.4375903755426407, |
| "eval_loss": 1.52578229829669, |
| "eval_mse_loss": 1.3069871068000793, |
| "flow/cos_sim": 0.5624096170067787, |
| "flow/improvement_ratio": 0.9644604399800301, |
| "flow/mag_ratio_mean": 0.5396961104124784, |
| "flow/mag_ratio_std": 0.22439152654260397, |
| "step": 55296 |
| }, |
| { |
| "epoch": 0.574748724131838, |
| "eval_cos_loss": 0.4375903755426407, |
| "eval_loss": 1.52578229829669, |
| "eval_mse_loss": 1.3069871068000793, |
| "eval_runtime": 2.5932, |
| "eval_samples_per_second": 771.259, |
| "eval_steps_per_second": 12.34, |
| "flow/cos_sim": 0.5624096170067787, |
| "flow/improvement_ratio": 0.9644604399800301, |
| "flow/mag_ratio_mean": 0.5396961104124784, |
| "flow/mag_ratio_std": 0.22439152654260397, |
| "step": 55296 |
| }, |
| { |
| "epoch": 0.5853922190231683, |
| "grad_norm": 0.19556212425231934, |
| "learning_rate": 0.00038862678683408014, |
| "loss": 1.548845648765564, |
| "step": 56320 |
| }, |
| { |
| "epoch": 0.5853922190231683, |
| "eval_cos_loss": 0.44385355431586504, |
| "eval_loss": 1.5408159419894218, |
| "eval_mse_loss": 1.3188891597092152, |
| "flow/cos_sim": 0.5561465304344893, |
| "flow/improvement_ratio": 0.9655895195901394, |
| "flow/mag_ratio_mean": 0.5379520216956735, |
| "flow/mag_ratio_std": 0.22702415706589818, |
| "step": 56320 |
| }, |
| { |
| "epoch": 0.5853922190231683, |
| "eval_cos_loss": 0.44385355431586504, |
| "eval_loss": 1.5408159419894218, |
| "eval_mse_loss": 1.3188891597092152, |
| "eval_runtime": 2.811, |
| "eval_samples_per_second": 711.502, |
| "eval_steps_per_second": 11.384, |
| "flow/cos_sim": 0.5561465304344893, |
| "flow/improvement_ratio": 0.9655895195901394, |
| "flow/mag_ratio_mean": 0.5379520216956735, |
| "flow/mag_ratio_std": 0.22702415706589818, |
| "step": 56320 |
| }, |
| { |
| "epoch": 0.5960357139144986, |
| "grad_norm": 0.154473677277565, |
| "learning_rate": 0.00037187654708719937, |
| "loss": 1.5476142168045044, |
| "step": 57344 |
| }, |
| { |
| "epoch": 0.5960357139144986, |
| "eval_cos_loss": 0.44051590468734503, |
| "eval_loss": 1.5316696986556053, |
| "eval_mse_loss": 1.3114117458462715, |
| "flow/cos_sim": 0.5594841903075576, |
| "flow/improvement_ratio": 0.9659801628440619, |
| "flow/mag_ratio_mean": 0.5374981416389346, |
| "flow/mag_ratio_std": 0.2266282932832837, |
| "step": 57344 |
| }, |
| { |
| "epoch": 0.5960357139144986, |
| "eval_cos_loss": 0.44051590468734503, |
| "eval_loss": 1.5316696986556053, |
| "eval_mse_loss": 1.3114117458462715, |
| "eval_runtime": 2.9501, |
| "eval_samples_per_second": 677.94, |
| "eval_steps_per_second": 10.847, |
| "flow/cos_sim": 0.5594841903075576, |
| "flow/improvement_ratio": 0.9659801628440619, |
| "flow/mag_ratio_mean": 0.5374981416389346, |
| "flow/mag_ratio_std": 0.2266282932832837, |
| "step": 57344 |
| }, |
| { |
| "epoch": 0.6066792088058289, |
| "grad_norm": 0.19920258224010468, |
| "learning_rate": 0.00035526270682447326, |
| "loss": 1.546189785003662, |
| "step": 58368 |
| }, |
| { |
| "epoch": 0.6066792088058289, |
| "eval_cos_loss": 0.45052160415798426, |
| "eval_loss": 1.5585628859698772, |
| "eval_mse_loss": 1.333302080631256, |
| "flow/cos_sim": 0.5494783949106932, |
| "flow/improvement_ratio": 0.9626638870686293, |
| "flow/mag_ratio_mean": 0.5275079058483243, |
| "flow/mag_ratio_std": 0.22685475973412395, |
| "step": 58368 |
| }, |
| { |
| "epoch": 0.6066792088058289, |
| "eval_cos_loss": 0.45052160415798426, |
| "eval_loss": 1.5585628859698772, |
| "eval_mse_loss": 1.333302080631256, |
| "eval_runtime": 2.5748, |
| "eval_samples_per_second": 776.758, |
| "eval_steps_per_second": 12.428, |
| "flow/cos_sim": 0.5494783949106932, |
| "flow/improvement_ratio": 0.9626638870686293, |
| "flow/mag_ratio_mean": 0.5275079058483243, |
| "flow/mag_ratio_std": 0.22685475973412395, |
| "step": 58368 |
| }, |
| { |
| "epoch": 0.6173227036971594, |
| "grad_norm": 0.15236690640449524, |
| "learning_rate": 0.0003388374920626505, |
| "loss": 1.546614408493042, |
| "step": 59392 |
| }, |
| { |
| "epoch": 0.6173227036971594, |
| "eval_cos_loss": 0.44333774503320456, |
| "eval_loss": 1.5386833399534225, |
| "eval_mse_loss": 1.3170144706964493, |
| "flow/cos_sim": 0.5566621888428926, |
| "flow/improvement_ratio": 0.9644087161868811, |
| "flow/mag_ratio_mean": 0.536553805693984, |
| "flow/mag_ratio_std": 0.22319983318448067, |
| "step": 59392 |
| }, |
| { |
| "epoch": 0.6173227036971594, |
| "eval_cos_loss": 0.44333774503320456, |
| "eval_loss": 1.5386833399534225, |
| "eval_mse_loss": 1.3170144706964493, |
| "eval_runtime": 2.5638, |
| "eval_samples_per_second": 780.079, |
| "eval_steps_per_second": 12.481, |
| "flow/cos_sim": 0.5566621888428926, |
| "flow/improvement_ratio": 0.9644087161868811, |
| "flow/mag_ratio_mean": 0.536553805693984, |
| "flow/mag_ratio_std": 0.22319983318448067, |
| "step": 59392 |
| }, |
| { |
| "epoch": 0.6279661985884897, |
| "grad_norm": 0.20959317684173584, |
| "learning_rate": 0.0003225883881381223, |
| "loss": 1.5455403327941895, |
| "step": 60416 |
| }, |
| { |
| "epoch": 0.6279661985884897, |
| "eval_cos_loss": 0.4418268231675029, |
| "eval_loss": 1.5369167998433113, |
| "eval_mse_loss": 1.3160033896565437, |
| "flow/cos_sim": 0.5581732373684645, |
| "flow/improvement_ratio": 0.9641634412109852, |
| "flow/mag_ratio_mean": 0.5371082350611687, |
| "flow/mag_ratio_std": 0.22345659835264087, |
| "step": 60416 |
| }, |
| { |
| "epoch": 0.6279661985884897, |
| "eval_cos_loss": 0.4418268231675029, |
| "eval_loss": 1.5369167998433113, |
| "eval_mse_loss": 1.3160033896565437, |
| "eval_runtime": 3.0215, |
| "eval_samples_per_second": 661.926, |
| "eval_steps_per_second": 10.591, |
| "flow/cos_sim": 0.5581732373684645, |
| "flow/improvement_ratio": 0.9641634412109852, |
| "flow/mag_ratio_mean": 0.5371082350611687, |
| "flow/mag_ratio_std": 0.22345659835264087, |
| "step": 60416 |
| }, |
| { |
| "epoch": 0.63860969347982, |
| "grad_norm": 0.14656518399715424, |
| "learning_rate": 0.00030656647450995957, |
| "loss": 1.5472609996795654, |
| "step": 61440 |
| }, |
| { |
| "epoch": 0.63860969347982, |
| "eval_cos_loss": 0.4388773338869214, |
| "eval_loss": 1.5277933366596699, |
| "eval_mse_loss": 1.308354664593935, |
| "flow/cos_sim": 0.5611227098852396, |
| "flow/improvement_ratio": 0.9615725018084049, |
| "flow/mag_ratio_mean": 0.5404465068131685, |
| "flow/mag_ratio_std": 0.22536235908046365, |
| "step": 61440 |
| }, |
| { |
| "epoch": 0.63860969347982, |
| "eval_cos_loss": 0.4388773338869214, |
| "eval_loss": 1.5277933366596699, |
| "eval_mse_loss": 1.308354664593935, |
| "eval_runtime": 2.725, |
| "eval_samples_per_second": 733.933, |
| "eval_steps_per_second": 11.743, |
| "flow/cos_sim": 0.5611227098852396, |
| "flow/improvement_ratio": 0.9615725018084049, |
| "flow/mag_ratio_mean": 0.5404465068131685, |
| "flow/mag_ratio_std": 0.22536235908046365, |
| "step": 61440 |
| }, |
| { |
| "epoch": 0.6492531883711503, |
| "grad_norm": 0.16984502971172333, |
| "learning_rate": 0.0002907595437968689, |
| "loss": 1.546900987625122, |
| "step": 62464 |
| }, |
| { |
| "epoch": 0.6492531883711503, |
| "eval_cos_loss": 0.44708416890352964, |
| "eval_loss": 1.550130132585764, |
| "eval_mse_loss": 1.3265880458056927, |
| "flow/cos_sim": 0.5529158180579543, |
| "flow/improvement_ratio": 0.964706290513277, |
| "flow/mag_ratio_mean": 0.5327735636383295, |
| "flow/mag_ratio_std": 0.22531110560521483, |
| "step": 62464 |
| }, |
| { |
| "epoch": 0.6492531883711503, |
| "eval_cos_loss": 0.44708416890352964, |
| "eval_loss": 1.550130132585764, |
| "eval_mse_loss": 1.3265880458056927, |
| "eval_runtime": 2.9226, |
| "eval_samples_per_second": 684.317, |
| "eval_steps_per_second": 10.949, |
| "flow/cos_sim": 0.5529158180579543, |
| "flow/improvement_ratio": 0.964706290513277, |
| "flow/mag_ratio_mean": 0.5327735636383295, |
| "flow/mag_ratio_std": 0.22531110560521483, |
| "step": 62464 |
| }, |
| { |
| "epoch": 0.6598966832624806, |
| "grad_norm": 0.16193066537380219, |
| "learning_rate": 0.00027521728547552225, |
| "loss": 1.5464342832565308, |
| "step": 63488 |
| }, |
| { |
| "epoch": 0.6598966832624806, |
| "eval_cos_loss": 0.44685271102935076, |
| "eval_loss": 1.5440139174461365, |
| "eval_mse_loss": 1.3205875717103481, |
| "flow/cos_sim": 0.5531473197042942, |
| "flow/improvement_ratio": 0.9620692692697048, |
| "flow/mag_ratio_mean": 0.5350175518542528, |
| "flow/mag_ratio_std": 0.23371326178312302, |
| "step": 63488 |
| }, |
| { |
| "epoch": 0.6598966832624806, |
| "eval_cos_loss": 0.44685271102935076, |
| "eval_loss": 1.5440139174461365, |
| "eval_mse_loss": 1.3205875717103481, |
| "eval_runtime": 2.588, |
| "eval_samples_per_second": 772.809, |
| "eval_steps_per_second": 12.365, |
| "flow/cos_sim": 0.5531473197042942, |
| "flow/improvement_ratio": 0.9620692692697048, |
| "flow/mag_ratio_mean": 0.5350175518542528, |
| "flow/mag_ratio_std": 0.23371326178312302, |
| "step": 63488 |
| }, |
| { |
| "epoch": 0.670540178153811, |
| "grad_norm": 0.15779773890972137, |
| "learning_rate": 0.00025992785762254565, |
| "loss": 1.545114278793335, |
| "step": 64512 |
| }, |
| { |
| "epoch": 0.670540178153811, |
| "eval_cos_loss": 0.4412845829501748, |
| "eval_loss": 1.5324720852077007, |
| "eval_mse_loss": 1.311829797923565, |
| "flow/cos_sim": 0.5587154757231474, |
| "flow/improvement_ratio": 0.9654579609632492, |
| "flow/mag_ratio_mean": 0.5386016722768545, |
| "flow/mag_ratio_std": 0.22591237584128976, |
| "step": 64512 |
| }, |
| { |
| "epoch": 0.670540178153811, |
| "eval_cos_loss": 0.4412845829501748, |
| "eval_loss": 1.5324720852077007, |
| "eval_mse_loss": 1.311829797923565, |
| "eval_runtime": 2.5359, |
| "eval_samples_per_second": 788.675, |
| "eval_steps_per_second": 12.619, |
| "flow/cos_sim": 0.5587154757231474, |
| "flow/improvement_ratio": 0.9654579609632492, |
| "flow/mag_ratio_mean": 0.5386016722768545, |
| "flow/mag_ratio_std": 0.22591237584128976, |
| "step": 64512 |
| }, |
| { |
| "epoch": 0.6811836730451413, |
| "grad_norm": 0.13823935389518738, |
| "learning_rate": 0.00024493932293160253, |
| "loss": 1.5480190515518188, |
| "step": 65536 |
| }, |
| { |
| "epoch": 0.6811836730451413, |
| "eval_cos_loss": 0.441369004547596, |
| "eval_loss": 1.5341729335486889, |
| "eval_mse_loss": 1.3134884350001812, |
| "flow/cos_sim": 0.5586309880018234, |
| "flow/improvement_ratio": 0.9630204811692238, |
| "flow/mag_ratio_mean": 0.5338549390435219, |
| "flow/mag_ratio_std": 0.2280183294788003, |
| "step": 65536 |
| }, |
| { |
| "epoch": 0.6811836730451413, |
| "eval_cos_loss": 0.441369004547596, |
| "eval_loss": 1.5341729335486889, |
| "eval_mse_loss": 1.3134884350001812, |
| "eval_runtime": 2.5158, |
| "eval_samples_per_second": 794.978, |
| "eval_steps_per_second": 12.72, |
| "flow/cos_sim": 0.5586309880018234, |
| "flow/improvement_ratio": 0.9630204811692238, |
| "flow/mag_ratio_mean": 0.5338549390435219, |
| "flow/mag_ratio_std": 0.2280183294788003, |
| "step": 65536 |
| }, |
| { |
| "epoch": 0.6918271679364716, |
| "grad_norm": 0.15880072116851807, |
| "learning_rate": 0.00023024026137121335, |
| "loss": 1.5464882850646973, |
| "step": 66560 |
| }, |
| { |
| "epoch": 0.6918271679364716, |
| "eval_cos_loss": 0.4457838609814644, |
| "eval_loss": 1.543992355465889, |
| "eval_mse_loss": 1.3211004212498665, |
| "flow/cos_sim": 0.554216225631535, |
| "flow/improvement_ratio": 0.9604913741350174, |
| "flow/mag_ratio_mean": 0.5307466676458716, |
| "flow/mag_ratio_std": 0.22845542011782527, |
| "step": 66560 |
| }, |
| { |
| "epoch": 0.6918271679364716, |
| "eval_cos_loss": 0.4457838609814644, |
| "eval_loss": 1.543992355465889, |
| "eval_mse_loss": 1.3211004212498665, |
| "eval_runtime": 2.821, |
| "eval_samples_per_second": 708.957, |
| "eval_steps_per_second": 11.343, |
| "flow/cos_sim": 0.554216225631535, |
| "flow/improvement_ratio": 0.9604913741350174, |
| "flow/mag_ratio_mean": 0.5307466676458716, |
| "flow/mag_ratio_std": 0.22845542011782527, |
| "step": 66560 |
| }, |
| { |
| "epoch": 0.702470662827802, |
| "grad_norm": 0.16780522465705872, |
| "learning_rate": 0.0002158768798040357, |
| "loss": 1.5465317964553833, |
| "step": 67584 |
| }, |
| { |
| "epoch": 0.702470662827802, |
| "eval_cos_loss": 0.4425074281170964, |
| "eval_loss": 1.5348311252892017, |
| "eval_mse_loss": 1.3135774172842503, |
| "flow/cos_sim": 0.557492695748806, |
| "flow/improvement_ratio": 0.9596080742776394, |
| "flow/mag_ratio_mean": 0.5359784308820963, |
| "flow/mag_ratio_std": 0.22894051391631365, |
| "step": 67584 |
| }, |
| { |
| "epoch": 0.702470662827802, |
| "eval_cos_loss": 0.4425074281170964, |
| "eval_loss": 1.5348311252892017, |
| "eval_mse_loss": 1.3135774172842503, |
| "eval_runtime": 3.1093, |
| "eval_samples_per_second": 643.239, |
| "eval_steps_per_second": 10.292, |
| "flow/cos_sim": 0.557492695748806, |
| "flow/improvement_ratio": 0.9596080742776394, |
| "flow/mag_ratio_mean": 0.5359784308820963, |
| "flow/mag_ratio_std": 0.22894051391631365, |
| "step": 67584 |
| }, |
| { |
| "epoch": 0.7131141577191323, |
| "grad_norm": 0.14971283078193665, |
| "learning_rate": 0.0002018382345138871, |
| "loss": 1.5448497533798218, |
| "step": 68608 |
| }, |
| { |
| "epoch": 0.7131141577191323, |
| "eval_cos_loss": 0.4460556413978338, |
| "eval_loss": 1.544582311064005, |
| "eval_mse_loss": 1.3215544857084751, |
| "flow/cos_sim": 0.5539443735033274, |
| "flow/improvement_ratio": 0.9611221943050623, |
| "flow/mag_ratio_mean": 0.5354085844010115, |
| "flow/mag_ratio_std": 0.23012477485463023, |
| "step": 68608 |
| }, |
| { |
| "epoch": 0.7131141577191323, |
| "eval_cos_loss": 0.4460556413978338, |
| "eval_loss": 1.544582311064005, |
| "eval_mse_loss": 1.3215544857084751, |
| "eval_runtime": 2.5817, |
| "eval_samples_per_second": 774.689, |
| "eval_steps_per_second": 12.395, |
| "flow/cos_sim": 0.5539443735033274, |
| "flow/improvement_ratio": 0.9611221943050623, |
| "flow/mag_ratio_mean": 0.5354085844010115, |
| "flow/mag_ratio_std": 0.23012477485463023, |
| "step": 68608 |
| }, |
| { |
| "epoch": 0.7237576526104627, |
| "grad_norm": 0.14716410636901855, |
| "learning_rate": 0.00018816845632858647, |
| "loss": 1.5454319715499878, |
| "step": 69632 |
| }, |
| { |
| "epoch": 0.7237576526104627, |
| "eval_cos_loss": 0.4387012077495456, |
| "eval_loss": 1.527622751891613, |
| "eval_mse_loss": 1.3082721531391144, |
| "flow/cos_sim": 0.5612988974899054, |
| "flow/improvement_ratio": 0.9638887159526348, |
| "flow/mag_ratio_mean": 0.5365499863401055, |
| "flow/mag_ratio_std": 0.2257627071812749, |
| "step": 69632 |
| }, |
| { |
| "epoch": 0.7237576526104627, |
| "eval_cos_loss": 0.4387012077495456, |
| "eval_loss": 1.527622751891613, |
| "eval_mse_loss": 1.3082721531391144, |
| "eval_runtime": 2.5421, |
| "eval_samples_per_second": 786.736, |
| "eval_steps_per_second": 12.588, |
| "flow/cos_sim": 0.5612988974899054, |
| "flow/improvement_ratio": 0.9638887159526348, |
| "flow/mag_ratio_mean": 0.5365499863401055, |
| "flow/mag_ratio_std": 0.2257627071812749, |
| "step": 69632 |
| }, |
| { |
| "epoch": 0.734401147501793, |
| "grad_norm": 0.1667480319738388, |
| "learning_rate": 0.00017485713000071836, |
| "loss": 1.5442575216293335, |
| "step": 70656 |
| }, |
| { |
| "epoch": 0.734401147501793, |
| "eval_cos_loss": 0.44433039985597134, |
| "eval_loss": 1.540200136601925, |
| "eval_mse_loss": 1.3180349357426167, |
| "flow/cos_sim": 0.5556696448475122, |
| "flow/improvement_ratio": 0.9625885803252459, |
| "flow/mag_ratio_mean": 0.5337581913918257, |
| "flow/mag_ratio_std": 0.22786249266937375, |
| "step": 70656 |
| }, |
| { |
| "epoch": 0.734401147501793, |
| "eval_cos_loss": 0.44433039985597134, |
| "eval_loss": 1.540200136601925, |
| "eval_mse_loss": 1.3180349357426167, |
| "eval_runtime": 2.5514, |
| "eval_samples_per_second": 783.877, |
| "eval_steps_per_second": 12.542, |
| "flow/cos_sim": 0.5556696448475122, |
| "flow/improvement_ratio": 0.9625885803252459, |
| "flow/mag_ratio_mean": 0.5337581913918257, |
| "flow/mag_ratio_std": 0.22786249266937375, |
| "step": 70656 |
| }, |
| { |
| "epoch": 0.7450446423931233, |
| "grad_norm": 0.21780510246753693, |
| "learning_rate": 0.00016194610001300043, |
| "loss": 1.5446763038635254, |
| "step": 71680 |
| }, |
| { |
| "epoch": 0.7450446423931233, |
| "eval_cos_loss": 0.44072897639125586, |
| "eval_loss": 1.53380636125803, |
| "eval_mse_loss": 1.3134418688714504, |
| "flow/cos_sim": 0.559271028265357, |
| "flow/improvement_ratio": 0.9642956424504519, |
| "flow/mag_ratio_mean": 0.5349069200456142, |
| "flow/mag_ratio_std": 0.22263477742671967, |
| "step": 71680 |
| }, |
| { |
| "epoch": 0.7450446423931233, |
| "eval_cos_loss": 0.44072897639125586, |
| "eval_loss": 1.53380636125803, |
| "eval_mse_loss": 1.3134418688714504, |
| "eval_runtime": 2.544, |
| "eval_samples_per_second": 786.162, |
| "eval_steps_per_second": 12.579, |
| "flow/cos_sim": 0.559271028265357, |
| "flow/improvement_ratio": 0.9642956424504519, |
| "flow/mag_ratio_mean": 0.5349069200456142, |
| "flow/mag_ratio_std": 0.22263477742671967, |
| "step": 71680 |
| }, |
| { |
| "epoch": 0.7556881372844536, |
| "grad_norm": 0.21069595217704773, |
| "learning_rate": 0.0001494255292217801, |
| "loss": 1.544022560119629, |
| "step": 72704 |
| }, |
| { |
| "epoch": 0.7556881372844536, |
| "eval_cos_loss": 0.44525754544883966, |
| "eval_loss": 1.5455412901937962, |
| "eval_mse_loss": 1.322912521660328, |
| "flow/cos_sim": 0.5547424238175154, |
| "flow/improvement_ratio": 0.9653947055339813, |
| "flow/mag_ratio_mean": 0.5336724892258644, |
| "flow/mag_ratio_std": 0.22635432425886393, |
| "step": 72704 |
| }, |
| { |
| "epoch": 0.7556881372844536, |
| "eval_cos_loss": 0.44525754544883966, |
| "eval_loss": 1.5455412901937962, |
| "eval_mse_loss": 1.322912521660328, |
| "eval_runtime": 2.8381, |
| "eval_samples_per_second": 704.708, |
| "eval_steps_per_second": 11.275, |
| "flow/cos_sim": 0.5547424238175154, |
| "flow/improvement_ratio": 0.9653947055339813, |
| "flow/mag_ratio_mean": 0.5336724892258644, |
| "flow/mag_ratio_std": 0.22635432425886393, |
| "step": 72704 |
| }, |
| { |
| "epoch": 0.7663316321757839, |
| "grad_norm": 0.13283193111419678, |
| "learning_rate": 0.0001373347763502365, |
| "loss": 1.545114517211914, |
| "step": 73728 |
| }, |
| { |
| "epoch": 0.7663316321757839, |
| "eval_cos_loss": 0.44415116030722857, |
| "eval_loss": 1.5426055118441582, |
| "eval_mse_loss": 1.320529941469431, |
| "flow/cos_sim": 0.5558489598333836, |
| "flow/improvement_ratio": 0.960325175896287, |
| "flow/mag_ratio_mean": 0.5346721205860376, |
| "flow/mag_ratio_std": 0.2243386204354465, |
| "step": 73728 |
| }, |
| { |
| "epoch": 0.7663316321757839, |
| "eval_cos_loss": 0.44415116030722857, |
| "eval_loss": 1.5426055118441582, |
| "eval_mse_loss": 1.320529941469431, |
| "eval_runtime": 2.5354, |
| "eval_samples_per_second": 788.834, |
| "eval_steps_per_second": 12.621, |
| "flow/cos_sim": 0.5558489598333836, |
| "flow/improvement_ratio": 0.960325175896287, |
| "flow/mag_ratio_mean": 0.5346721205860376, |
| "flow/mag_ratio_std": 0.2243386204354465, |
| "step": 73728 |
| }, |
| { |
| "epoch": 0.7769751270671144, |
| "grad_norm": 0.13968615233898163, |
| "learning_rate": 0.00012566462923845807, |
| "loss": 1.5433732271194458, |
| "step": 74752 |
| }, |
| { |
| "epoch": 0.7769751270671144, |
| "eval_cos_loss": 0.44200514908879995, |
| "eval_loss": 1.5366779565811157, |
| "eval_mse_loss": 1.3156753852963448, |
| "flow/cos_sim": 0.5579948425292969, |
| "flow/improvement_ratio": 0.9636132828891277, |
| "flow/mag_ratio_mean": 0.5333766378462315, |
| "flow/mag_ratio_std": 0.22553266119211912, |
| "step": 74752 |
| }, |
| { |
| "epoch": 0.7769751270671144, |
| "eval_cos_loss": 0.44200514908879995, |
| "eval_loss": 1.5366779565811157, |
| "eval_mse_loss": 1.3156753852963448, |
| "eval_runtime": 2.4816, |
| "eval_samples_per_second": 805.947, |
| "eval_steps_per_second": 12.895, |
| "flow/cos_sim": 0.5579948425292969, |
| "flow/improvement_ratio": 0.9636132828891277, |
| "flow/mag_ratio_mean": 0.5333766378462315, |
| "flow/mag_ratio_std": 0.22553266119211912, |
| "step": 74752 |
| }, |
| { |
| "epoch": 0.7876186219584447, |
| "grad_norm": 0.22951993346214294, |
| "learning_rate": 0.00011445177328180711, |
| "loss": 1.5437688827514648, |
| "step": 75776 |
| }, |
| { |
| "epoch": 0.7876186219584447, |
| "eval_cos_loss": 0.44082553684711456, |
| "eval_loss": 1.5322747267782688, |
| "eval_mse_loss": 1.3118619658052921, |
| "flow/cos_sim": 0.5591745115816593, |
| "flow/improvement_ratio": 0.9597889501601458, |
| "flow/mag_ratio_mean": 0.5374172441661358, |
| "flow/mag_ratio_std": 0.22841465286910534, |
| "step": 75776 |
| }, |
| { |
| "epoch": 0.7876186219584447, |
| "eval_cos_loss": 0.44082553684711456, |
| "eval_loss": 1.5322747267782688, |
| "eval_mse_loss": 1.3118619658052921, |
| "eval_runtime": 2.5567, |
| "eval_samples_per_second": 782.259, |
| "eval_steps_per_second": 12.516, |
| "flow/cos_sim": 0.5591745115816593, |
| "flow/improvement_ratio": 0.9597889501601458, |
| "flow/mag_ratio_mean": 0.5374172441661358, |
| "flow/mag_ratio_std": 0.22841465286910534, |
| "step": 75776 |
| }, |
| { |
| "epoch": 0.798262116849775, |
| "grad_norm": 0.15128082036972046, |
| "learning_rate": 0.00010369794845541591, |
| "loss": 1.545216679573059, |
| "step": 76800 |
| }, |
| { |
| "epoch": 0.798262116849775, |
| "eval_cos_loss": 0.4396012471988797, |
| "eval_loss": 1.5277335830032825, |
| "eval_mse_loss": 1.3079329580068588, |
| "flow/cos_sim": 0.5603987323120236, |
| "flow/improvement_ratio": 0.9618115201592445, |
| "flow/mag_ratio_mean": 0.5371274519711733, |
| "flow/mag_ratio_std": 0.22797544300556183, |
| "step": 76800 |
| }, |
| { |
| "epoch": 0.798262116849775, |
| "eval_cos_loss": 0.4396012471988797, |
| "eval_loss": 1.5277335830032825, |
| "eval_mse_loss": 1.3079329580068588, |
| "eval_runtime": 2.4926, |
| "eval_samples_per_second": 802.376, |
| "eval_steps_per_second": 12.838, |
| "flow/cos_sim": 0.5603987323120236, |
| "flow/improvement_ratio": 0.9618115201592445, |
| "flow/mag_ratio_mean": 0.5371274519711733, |
| "flow/mag_ratio_std": 0.22797544300556183, |
| "step": 76800 |
| }, |
| { |
| "epoch": 0.8089056117411053, |
| "grad_norm": 0.15258397161960602, |
| "learning_rate": 9.340614224932947e-05, |
| "loss": 1.5437705516815186, |
| "step": 77824 |
| }, |
| { |
| "epoch": 0.8089056117411053, |
| "eval_cos_loss": 0.4406488761305809, |
| "eval_loss": 1.5305031947791576, |
| "eval_mse_loss": 1.3101787567138672, |
| "flow/cos_sim": 0.5593511275947094, |
| "flow/improvement_ratio": 0.9616372548043728, |
| "flow/mag_ratio_mean": 0.5346422707661986, |
| "flow/mag_ratio_std": 0.22846621181815863, |
| "step": 77824 |
| }, |
| { |
| "epoch": 0.8089056117411053, |
| "eval_cos_loss": 0.4406488761305809, |
| "eval_loss": 1.5305031947791576, |
| "eval_mse_loss": 1.3101787567138672, |
| "eval_runtime": 2.5281, |
| "eval_samples_per_second": 791.111, |
| "eval_steps_per_second": 12.658, |
| "flow/cos_sim": 0.5593511275947094, |
| "flow/improvement_ratio": 0.9616372548043728, |
| "flow/mag_ratio_mean": 0.5346422707661986, |
| "flow/mag_ratio_std": 0.22846621181815863, |
| "step": 77824 |
| }, |
| { |
| "epoch": 0.8195491066324356, |
| "grad_norm": 0.15012474358081818, |
| "learning_rate": 8.359937073738122e-05, |
| "loss": 1.5427945852279663, |
| "step": 78848 |
| }, |
| { |
| "epoch": 0.8195491066324356, |
| "eval_cos_loss": 0.4398349104449153, |
| "eval_loss": 1.5277978368103504, |
| "eval_mse_loss": 1.3078803904354572, |
| "flow/cos_sim": 0.5601652916520834, |
| "flow/improvement_ratio": 0.9623467661440372, |
| "flow/mag_ratio_mean": 0.5370206441730261, |
| "flow/mag_ratio_std": 0.2306741690263152, |
| "step": 78848 |
| }, |
| { |
| "epoch": 0.8195491066324356, |
| "eval_cos_loss": 0.4398349104449153, |
| "eval_loss": 1.5277978368103504, |
| "eval_mse_loss": 1.3078803904354572, |
| "eval_runtime": 2.9061, |
| "eval_samples_per_second": 688.21, |
| "eval_steps_per_second": 11.011, |
| "flow/cos_sim": 0.5601652916520834, |
| "flow/improvement_ratio": 0.9623467661440372, |
| "flow/mag_ratio_mean": 0.5370206441730261, |
| "flow/mag_ratio_std": 0.2306741690263152, |
| "step": 78848 |
| }, |
| { |
| "epoch": 0.830192601523766, |
| "grad_norm": 0.14434155821800232, |
| "learning_rate": 7.429817849801124e-05, |
| "loss": 1.5424107313156128, |
| "step": 79872 |
| }, |
| { |
| "epoch": 0.830192601523766, |
| "eval_cos_loss": 0.440770055167377, |
| "eval_loss": 1.5335130989551544, |
| "eval_mse_loss": 1.31312807649374, |
| "flow/cos_sim": 0.5592298936098814, |
| "flow/improvement_ratio": 0.9679525289684534, |
| "flow/mag_ratio_mean": 0.5366982752457261, |
| "flow/mag_ratio_std": 0.22420579148456454, |
| "step": 79872 |
| }, |
| { |
| "epoch": 0.830192601523766, |
| "eval_cos_loss": 0.440770055167377, |
| "eval_loss": 1.5335130989551544, |
| "eval_mse_loss": 1.31312807649374, |
| "eval_runtime": 2.4978, |
| "eval_samples_per_second": 800.693, |
| "eval_steps_per_second": 12.811, |
| "flow/cos_sim": 0.5592298936098814, |
| "flow/improvement_ratio": 0.9679525289684534, |
| "flow/mag_ratio_mean": 0.5366982752457261, |
| "flow/mag_ratio_std": 0.22420579148456454, |
| "step": 79872 |
| }, |
| { |
| "epoch": 0.8408360964150963, |
| "grad_norm": 0.14048728346824646, |
| "learning_rate": 6.54954787872275e-05, |
| "loss": 1.5442392826080322, |
| "step": 80896 |
| }, |
| { |
| "epoch": 0.8408360964150963, |
| "eval_cos_loss": 0.44281749427318573, |
| "eval_loss": 1.5380274765193462, |
| "eval_mse_loss": 1.3166187293827534, |
| "flow/cos_sim": 0.5571825094521046, |
| "flow/improvement_ratio": 0.9661159794777632, |
| "flow/mag_ratio_mean": 0.5346010681241751, |
| "flow/mag_ratio_std": 0.22787420498207211, |
| "step": 80896 |
| }, |
| { |
| "epoch": 0.8408360964150963, |
| "eval_cos_loss": 0.44281749427318573, |
| "eval_loss": 1.5380274765193462, |
| "eval_mse_loss": 1.3166187293827534, |
| "eval_runtime": 2.5057, |
| "eval_samples_per_second": 798.189, |
| "eval_steps_per_second": 12.771, |
| "flow/cos_sim": 0.5571825094521046, |
| "flow/improvement_ratio": 0.9661159794777632, |
| "flow/mag_ratio_mean": 0.5346010681241751, |
| "flow/mag_ratio_std": 0.22787420498207211, |
| "step": 80896 |
| }, |
| { |
| "epoch": 0.8514795913064266, |
| "grad_norm": 0.15351833403110504, |
| "learning_rate": 5.7211109010678276e-05, |
| "loss": 1.5441709756851196, |
| "step": 81920 |
| }, |
| { |
| "epoch": 0.8514795913064266, |
| "eval_cos_loss": 0.43972852267324924, |
| "eval_loss": 1.5298539474606514, |
| "eval_mse_loss": 1.3099896907806396, |
| "flow/cos_sim": 0.5602715257555246, |
| "flow/improvement_ratio": 0.9654016513377428, |
| "flow/mag_ratio_mean": 0.5356091465801001, |
| "flow/mag_ratio_std": 0.22713992185890675, |
| "step": 81920 |
| }, |
| { |
| "epoch": 0.8514795913064266, |
| "eval_cos_loss": 0.43972852267324924, |
| "eval_loss": 1.5298539474606514, |
| "eval_mse_loss": 1.3099896907806396, |
| "eval_runtime": 3.0577, |
| "eval_samples_per_second": 654.077, |
| "eval_steps_per_second": 10.465, |
| "flow/cos_sim": 0.5602715257555246, |
| "flow/improvement_ratio": 0.9654016513377428, |
| "flow/mag_ratio_mean": 0.5356091465801001, |
| "flow/mag_ratio_std": 0.22713992185890675, |
| "step": 81920 |
| }, |
| { |
| "epoch": 0.862123086197757, |
| "grad_norm": 0.14501118659973145, |
| "learning_rate": 4.946226542264676e-05, |
| "loss": 1.5440622568130493, |
| "step": 82944 |
| }, |
| { |
| "epoch": 0.862123086197757, |
| "eval_cos_loss": 0.4398494055494666, |
| "eval_loss": 1.5285302698612213, |
| "eval_mse_loss": 1.3086055591702461, |
| "flow/cos_sim": 0.5601506568491459, |
| "flow/improvement_ratio": 0.9636457152664661, |
| "flow/mag_ratio_mean": 0.5373753281310201, |
| "flow/mag_ratio_std": 0.22595488466322422, |
| "step": 82944 |
| }, |
| { |
| "epoch": 0.862123086197757, |
| "eval_cos_loss": 0.4398494055494666, |
| "eval_loss": 1.5285302698612213, |
| "eval_mse_loss": 1.3086055591702461, |
| "eval_runtime": 2.6735, |
| "eval_samples_per_second": 748.087, |
| "eval_steps_per_second": 11.969, |
| "flow/cos_sim": 0.5601506568491459, |
| "flow/improvement_ratio": 0.9636457152664661, |
| "flow/mag_ratio_mean": 0.5373753281310201, |
| "flow/mag_ratio_std": 0.22595488466322422, |
| "step": 82944 |
| }, |
| { |
| "epoch": 0.8727665810890873, |
| "grad_norm": 0.1732260137796402, |
| "learning_rate": 4.2243044041220535e-05, |
| "loss": 1.5427820682525635, |
| "step": 83968 |
| }, |
| { |
| "epoch": 0.8727665810890873, |
| "eval_cos_loss": 0.43963075149804354, |
| "eval_loss": 1.5304800160229206, |
| "eval_mse_loss": 1.310664638876915, |
| "flow/cos_sim": 0.5603692829608917, |
| "flow/improvement_ratio": 0.9656369760632515, |
| "flow/mag_ratio_mean": 0.5373333236202598, |
| "flow/mag_ratio_std": 0.22598782274872065, |
| "step": 83968 |
| }, |
| { |
| "epoch": 0.8727665810890873, |
| "eval_cos_loss": 0.43963075149804354, |
| "eval_loss": 1.5304800160229206, |
| "eval_mse_loss": 1.310664638876915, |
| "eval_runtime": 2.6771, |
| "eval_samples_per_second": 747.086, |
| "eval_steps_per_second": 11.953, |
| "flow/cos_sim": 0.5603692829608917, |
| "flow/improvement_ratio": 0.9656369760632515, |
| "flow/mag_ratio_mean": 0.5373333236202598, |
| "flow/mag_ratio_std": 0.22598782274872065, |
| "step": 83968 |
| }, |
| { |
| "epoch": 0.8834100759804177, |
| "grad_norm": 0.1780228167772293, |
| "learning_rate": 3.556989093134011e-05, |
| "loss": 1.5418999195098877, |
| "step": 84992 |
| }, |
| { |
| "epoch": 0.8834100759804177, |
| "eval_cos_loss": 0.4417334571480751, |
| "eval_loss": 1.5349605418741703, |
| "eval_mse_loss": 1.3140938207507133, |
| "flow/cos_sim": 0.5582665763795376, |
| "flow/improvement_ratio": 0.9667846951633692, |
| "flow/mag_ratio_mean": 0.5365295764058828, |
| "flow/mag_ratio_std": 0.22713460819795728, |
| "step": 84992 |
| }, |
| { |
| "epoch": 0.8834100759804177, |
| "eval_cos_loss": 0.4417334571480751, |
| "eval_loss": 1.5349605418741703, |
| "eval_mse_loss": 1.3140938207507133, |
| "eval_runtime": 2.545, |
| "eval_samples_per_second": 785.844, |
| "eval_steps_per_second": 12.574, |
| "flow/cos_sim": 0.5582665763795376, |
| "flow/improvement_ratio": 0.9667846951633692, |
| "flow/mag_ratio_mean": 0.5365295764058828, |
| "flow/mag_ratio_std": 0.22713460819795728, |
| "step": 84992 |
| }, |
| { |
| "epoch": 0.894053570871748, |
| "grad_norm": 0.1589423269033432, |
| "learning_rate": 2.9450766643112025e-05, |
| "loss": 1.5413941144943237, |
| "step": 86016 |
| }, |
| { |
| "epoch": 0.894053570871748, |
| "eval_cos_loss": 0.4417336033657193, |
| "eval_loss": 1.5350174084305763, |
| "eval_mse_loss": 1.314150609076023, |
| "flow/cos_sim": 0.5582663975656033, |
| "flow/improvement_ratio": 0.9610403534024954, |
| "flow/mag_ratio_mean": 0.5371948201209307, |
| "flow/mag_ratio_std": 0.22706548869609833, |
| "step": 86016 |
| }, |
| { |
| "epoch": 0.894053570871748, |
| "eval_cos_loss": 0.4417336033657193, |
| "eval_loss": 1.5350174084305763, |
| "eval_mse_loss": 1.314150609076023, |
| "eval_runtime": 2.516, |
| "eval_samples_per_second": 794.928, |
| "eval_steps_per_second": 12.719, |
| "flow/cos_sim": 0.5582663975656033, |
| "flow/improvement_ratio": 0.9610403534024954, |
| "flow/mag_ratio_mean": 0.5371948201209307, |
| "flow/mag_ratio_std": 0.22706548869609833, |
| "step": 86016 |
| }, |
| { |
| "epoch": 0.9046970657630783, |
| "grad_norm": 0.13952629268169403, |
| "learning_rate": 2.3898122320136107e-05, |
| "loss": 1.5443463325500488, |
| "step": 87040 |
| }, |
| { |
| "epoch": 0.9046970657630783, |
| "eval_cos_loss": 0.4454851495102048, |
| "eval_loss": 1.5462815202772617, |
| "eval_mse_loss": 1.3235389403998852, |
| "flow/cos_sim": 0.5545149501413107, |
| "flow/improvement_ratio": 0.9633045084774494, |
| "flow/mag_ratio_mean": 0.5327793834730983, |
| "flow/mag_ratio_std": 0.22570591513067484, |
| "step": 87040 |
| }, |
| { |
| "epoch": 0.9046970657630783, |
| "eval_cos_loss": 0.4454851495102048, |
| "eval_loss": 1.5462815202772617, |
| "eval_mse_loss": 1.3235389403998852, |
| "eval_runtime": 3.0237, |
| "eval_samples_per_second": 661.435, |
| "eval_steps_per_second": 10.583, |
| "flow/cos_sim": 0.5545149501413107, |
| "flow/improvement_ratio": 0.9633045084774494, |
| "flow/mag_ratio_mean": 0.5327793834730983, |
| "flow/mag_ratio_std": 0.22570591513067484, |
| "step": 87040 |
| }, |
| { |
| "epoch": 0.9153405606544086, |
| "grad_norm": 0.1696067899465561, |
| "learning_rate": 1.8907727303827394e-05, |
| "loss": 1.5422346591949463, |
| "step": 88064 |
| }, |
| { |
| "epoch": 0.9153405606544086, |
| "eval_cos_loss": 0.4382868492975831, |
| "eval_loss": 1.5258447527885437, |
| "eval_mse_loss": 1.3067013174295425, |
| "flow/cos_sim": 0.561713146045804, |
| "flow/improvement_ratio": 0.9594295676797628, |
| "flow/mag_ratio_mean": 0.5373271182179451, |
| "flow/mag_ratio_std": 0.22556039178743958, |
| "step": 88064 |
| }, |
| { |
| "epoch": 0.9153405606544086, |
| "eval_cos_loss": 0.4382868492975831, |
| "eval_loss": 1.5258447527885437, |
| "eval_mse_loss": 1.3067013174295425, |
| "eval_runtime": 2.5211, |
| "eval_samples_per_second": 793.311, |
| "eval_steps_per_second": 12.693, |
| "flow/cos_sim": 0.561713146045804, |
| "flow/improvement_ratio": 0.9594295676797628, |
| "flow/mag_ratio_mean": 0.5373271182179451, |
| "flow/mag_ratio_std": 0.22556039178743958, |
| "step": 88064 |
| }, |
| { |
| "epoch": 0.9259840555457389, |
| "grad_norm": 0.15072369575500488, |
| "learning_rate": 1.4491237768113841e-05, |
| "loss": 1.542992353439331, |
| "step": 89088 |
| }, |
| { |
| "epoch": 0.9259840555457389, |
| "eval_cos_loss": 0.4420606214553118, |
| "eval_loss": 1.5344204120337963, |
| "eval_mse_loss": 1.3133900947868824, |
| "flow/cos_sim": 0.557939387857914, |
| "flow/improvement_ratio": 0.9637042284011841, |
| "flow/mag_ratio_mean": 0.5341464914381504, |
| "flow/mag_ratio_std": 0.22699455870315433, |
| "step": 89088 |
| }, |
| { |
| "epoch": 0.9259840555457389, |
| "eval_cos_loss": 0.4420606214553118, |
| "eval_loss": 1.5344204120337963, |
| "eval_mse_loss": 1.3133900947868824, |
| "eval_runtime": 2.613, |
| "eval_samples_per_second": 765.401, |
| "eval_steps_per_second": 12.246, |
| "flow/cos_sim": 0.557939387857914, |
| "flow/improvement_ratio": 0.9637042284011841, |
| "flow/mag_ratio_mean": 0.5341464914381504, |
| "flow/mag_ratio_std": 0.22699455870315433, |
| "step": 89088 |
| }, |
| { |
| "epoch": 0.9366275504370692, |
| "grad_norm": 0.15198417007923126, |
| "learning_rate": 1.065392223983186e-05, |
| "loss": 1.5422855615615845, |
| "step": 90112 |
| }, |
| { |
| "epoch": 0.9366275504370692, |
| "eval_cos_loss": 0.4458601539954543, |
| "eval_loss": 1.5451230816543102, |
| "eval_mse_loss": 1.3221930228173733, |
| "flow/cos_sim": 0.5541399177163839, |
| "flow/improvement_ratio": 0.958930304273963, |
| "flow/mag_ratio_mean": 0.5324209975078702, |
| "flow/mag_ratio_std": 0.22846948402002454, |
| "step": 90112 |
| }, |
| { |
| "epoch": 0.9366275504370692, |
| "eval_cos_loss": 0.4458601539954543, |
| "eval_loss": 1.5451230816543102, |
| "eval_mse_loss": 1.3221930228173733, |
| "eval_runtime": 3.0844, |
| "eval_samples_per_second": 648.42, |
| "eval_steps_per_second": 10.375, |
| "flow/cos_sim": 0.5541399177163839, |
| "flow/improvement_ratio": 0.958930304273963, |
| "flow/mag_ratio_mean": 0.5324209975078702, |
| "flow/mag_ratio_std": 0.22846948402002454, |
| "step": 90112 |
| }, |
| { |
| "epoch": 0.9472710453283997, |
| "grad_norm": 0.1283935010433197, |
| "learning_rate": 7.40324956991506e-06, |
| "loss": 1.5435974597930908, |
| "step": 91136 |
| }, |
| { |
| "epoch": 0.9472710453283997, |
| "eval_cos_loss": 0.4411879302933812, |
| "eval_loss": 1.5341791696846485, |
| "eval_mse_loss": 1.313585203140974, |
| "flow/cos_sim": 0.5588121470063925, |
| "flow/improvement_ratio": 0.9638758208602667, |
| "flow/mag_ratio_mean": 0.5351903941482306, |
| "flow/mag_ratio_std": 0.22519566072151065, |
| "step": 91136 |
| }, |
| { |
| "epoch": 0.9472710453283997, |
| "eval_cos_loss": 0.4411879302933812, |
| "eval_loss": 1.5341791696846485, |
| "eval_mse_loss": 1.313585203140974, |
| "eval_runtime": 2.6762, |
| "eval_samples_per_second": 747.335, |
| "eval_steps_per_second": 11.957, |
| "flow/cos_sim": 0.5588121470063925, |
| "flow/improvement_ratio": 0.9638758208602667, |
| "flow/mag_ratio_mean": 0.5351903941482306, |
| "flow/mag_ratio_std": 0.22519566072151065, |
| "step": 91136 |
| }, |
| { |
| "epoch": 0.95791454021973, |
| "grad_norm": 0.12574529647827148, |
| "learning_rate": 4.7367430129720004e-06, |
| "loss": 1.5435535907745361, |
| "step": 92160 |
| }, |
| { |
| "epoch": 0.95791454021973, |
| "eval_cos_loss": 0.44530233182013035, |
| "eval_loss": 1.5450147837400436, |
| "eval_mse_loss": 1.3223636075854301, |
| "flow/cos_sim": 0.5546976234763861, |
| "flow/improvement_ratio": 0.9661824498325586, |
| "flow/mag_ratio_mean": 0.5322676496580243, |
| "flow/mag_ratio_std": 0.22808025730773807, |
| "step": 92160 |
| }, |
| { |
| "epoch": 0.95791454021973, |
| "eval_cos_loss": 0.44530233182013035, |
| "eval_loss": 1.5450147837400436, |
| "eval_mse_loss": 1.3223636075854301, |
| "eval_runtime": 2.5148, |
| "eval_samples_per_second": 795.3, |
| "eval_steps_per_second": 12.725, |
| "flow/cos_sim": 0.5546976234763861, |
| "flow/improvement_ratio": 0.9661824498325586, |
| "flow/mag_ratio_mean": 0.5322676496580243, |
| "flow/mag_ratio_std": 0.22808025730773807, |
| "step": 92160 |
| }, |
| { |
| "epoch": 0.9685580351110603, |
| "grad_norm": 0.1402869075536728, |
| "learning_rate": 2.662784798150131e-06, |
| "loss": 1.5427674055099487, |
| "step": 93184 |
| }, |
| { |
| "epoch": 0.9685580351110603, |
| "eval_cos_loss": 0.44235736317932606, |
| "eval_loss": 1.536139328032732, |
| "eval_mse_loss": 1.3149606361985207, |
| "flow/cos_sim": 0.5576427038758993, |
| "flow/improvement_ratio": 0.9603242203593254, |
| "flow/mag_ratio_mean": 0.5352574419230223, |
| "flow/mag_ratio_std": 0.2266941787675023, |
| "step": 93184 |
| }, |
| { |
| "epoch": 0.9685580351110603, |
| "eval_cos_loss": 0.44235736317932606, |
| "eval_loss": 1.536139328032732, |
| "eval_mse_loss": 1.3149606361985207, |
| "eval_runtime": 2.565, |
| "eval_samples_per_second": 779.726, |
| "eval_steps_per_second": 12.476, |
| "flow/cos_sim": 0.5576427038758993, |
| "flow/improvement_ratio": 0.9603242203593254, |
| "flow/mag_ratio_mean": 0.5352574419230223, |
| "flow/mag_ratio_std": 0.2266941787675023, |
| "step": 93184 |
| }, |
| { |
| "epoch": 0.9792015300023906, |
| "grad_norm": 0.1338053047657013, |
| "learning_rate": 1.1797947397548802e-06, |
| "loss": 1.5422078371047974, |
| "step": 94208 |
| }, |
| { |
| "epoch": 0.9792015300023906, |
| "eval_cos_loss": 0.44181027822196484, |
| "eval_loss": 1.5355971939861774, |
| "eval_mse_loss": 1.3146920576691628, |
| "flow/cos_sim": 0.558189669623971, |
| "flow/improvement_ratio": 0.9626132287085056, |
| "flow/mag_ratio_mean": 0.5348945092409849, |
| "flow/mag_ratio_std": 0.22780301421880722, |
| "step": 94208 |
| }, |
| { |
| "epoch": 0.9792015300023906, |
| "eval_cos_loss": 0.44181027822196484, |
| "eval_loss": 1.5355971939861774, |
| "eval_mse_loss": 1.3146920576691628, |
| "eval_runtime": 2.4969, |
| "eval_samples_per_second": 800.987, |
| "eval_steps_per_second": 12.816, |
| "flow/cos_sim": 0.558189669623971, |
| "flow/improvement_ratio": 0.9626132287085056, |
| "flow/mag_ratio_mean": 0.5348945092409849, |
| "flow/mag_ratio_std": 0.22780301421880722, |
| "step": 94208 |
| }, |
| { |
| "epoch": 0.989845024893721, |
| "grad_norm": 0.12129372358322144, |
| "learning_rate": 2.9243465362199797e-07, |
| "loss": 1.541589379310608, |
| "step": 95232 |
| }, |
| { |
| "epoch": 0.989845024893721, |
| "eval_cos_loss": 0.44525294937193394, |
| "eval_loss": 1.5437418557703495, |
| "eval_mse_loss": 1.3211153745651245, |
| "flow/cos_sim": 0.5547471418976784, |
| "flow/improvement_ratio": 0.9627930391579866, |
| "flow/mag_ratio_mean": 0.5311954086646438, |
| "flow/mag_ratio_std": 0.2286191936582327, |
| "step": 95232 |
| }, |
| { |
| "epoch": 0.989845024893721, |
| "eval_cos_loss": 0.44525294937193394, |
| "eval_loss": 1.5437418557703495, |
| "eval_mse_loss": 1.3211153745651245, |
| "eval_runtime": 2.5197, |
| "eval_samples_per_second": 793.751, |
| "eval_steps_per_second": 12.7, |
| "flow/cos_sim": 0.5547471418976784, |
| "flow/improvement_ratio": 0.9627930391579866, |
| "flow/mag_ratio_mean": 0.5311954086646438, |
| "flow/mag_ratio_std": 0.2286191936582327, |
| "step": 95232 |
| } |
| ], |
| "logging_steps": 1024, |
| "max_steps": 96209, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|