| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.6148445799270241, |
| "eval_steps": 1024, |
| "global_step": 13312, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011823934229365849, |
| "grad_norm": 0.17750050127506256, |
| "learning_rate": 0.000498046875, |
| "loss": 1.7531359195709229, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.023647868458731697, |
| "grad_norm": 0.14886082708835602, |
| "learning_rate": 0.000998046875, |
| "loss": 1.120242953300476, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03547180268809755, |
| "grad_norm": 0.11476853489875793, |
| "learning_rate": 0.000999640996023194, |
| "loss": 1.0460094213485718, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "grad_norm": 0.26101624965667725, |
| "learning_rate": 0.0009985588674043958, |
| "loss": 1.0132286548614502, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "eval_cos_loss": 0.3147891739459887, |
| "eval_loss": 0.9696788661283989, |
| "eval_mse_loss": 0.9696788661283989, |
| "flow/cos_sim": 0.6852108316334415, |
| "flow/improvement_ratio": 0.4729253878332164, |
| "flow/mag_ratio_mean": 0.6834825821663146, |
| "flow/mag_ratio_std": 0.18169553397589078, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "eval_cos_loss": 0.3147891739459887, |
| "eval_loss": 0.9696788661283989, |
| "eval_mse_loss": 0.9696788661283989, |
| "eval_runtime": 37.5026, |
| "eval_samples_per_second": 746.427, |
| "eval_steps_per_second": 11.679, |
| "flow/cos_sim": 0.6852108316334415, |
| "flow/improvement_ratio": 0.4729253878332164, |
| "flow/mag_ratio_mean": 0.6834825821663146, |
| "flow/mag_ratio_std": 0.18169553397589078, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.05911967114682925, |
| "grad_norm": 0.32471492886543274, |
| "learning_rate": 0.0009967551747861387, |
| "loss": 0.9918397068977356, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.0709436053761951, |
| "grad_norm": 0.19580507278442383, |
| "learning_rate": 0.000994232528651847, |
| "loss": 0.9722790718078613, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.08276753960556095, |
| "grad_norm": 0.13594156503677368, |
| "learning_rate": 0.0009909945800260092, |
| "loss": 0.9547147750854492, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "grad_norm": 0.49703535437583923, |
| "learning_rate": 0.0009870460151900522, |
| "loss": 0.9414160847663879, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "eval_cos_loss": 0.2932011633430986, |
| "eval_loss": 0.8981472950547797, |
| "eval_mse_loss": 0.8981472950547797, |
| "flow/cos_sim": 0.7067988623767139, |
| "flow/improvement_ratio": 0.47438520363204556, |
| "flow/mag_ratio_mean": 0.6956081727868346, |
| "flow/mag_ratio_std": 0.19316728430115468, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "eval_cos_loss": 0.2932011633430986, |
| "eval_loss": 0.8981472950547797, |
| "eval_mse_loss": 0.8981472950547797, |
| "eval_runtime": 37.712, |
| "eval_samples_per_second": 742.284, |
| "eval_steps_per_second": 11.614, |
| "flow/cos_sim": 0.7067988623767139, |
| "flow/improvement_ratio": 0.47438520363204556, |
| "flow/mag_ratio_mean": 0.6956081727868346, |
| "flow/mag_ratio_std": 0.19316728430115468, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.10641540806429264, |
| "grad_norm": 0.49009642004966736, |
| "learning_rate": 0.0009823925488998885, |
| "loss": 0.929905116558075, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.1182393422936585, |
| "grad_norm": 0.21833102405071259, |
| "learning_rate": 0.0009770409161149525, |
| "loss": 0.9224135875701904, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.13006327652302435, |
| "grad_norm": 0.3465236723423004, |
| "learning_rate": 0.0009709988622506973, |
| "loss": 0.9120264649391174, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "grad_norm": 0.3057152032852173, |
| "learning_rate": 0.000964275131968659, |
| "loss": 0.9054349064826965, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "eval_cos_loss": 0.28495094285571954, |
| "eval_loss": 0.8664921860988826, |
| "eval_mse_loss": 0.8664921860988826, |
| "flow/cos_sim": 0.7150490750733032, |
| "flow/improvement_ratio": 0.48076002510715293, |
| "flow/mag_ratio_mean": 0.7135201504785721, |
| "flow/mag_ratio_std": 0.19565439009911392, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "eval_cos_loss": 0.28495094285571954, |
| "eval_loss": 0.8664921860988826, |
| "eval_mse_loss": 0.8664921860988826, |
| "eval_runtime": 38.3291, |
| "eval_samples_per_second": 730.333, |
| "eval_steps_per_second": 11.427, |
| "flow/cos_sim": 0.7150490750733032, |
| "flow/improvement_ratio": 0.48076002510715293, |
| "flow/mag_ratio_mean": 0.7135201504785721, |
| "flow/mag_ratio_std": 0.19565439009911392, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.15371114498175603, |
| "grad_norm": 0.599277138710022, |
| "learning_rate": 0.0009568794565203123, |
| "loss": 0.9003790020942688, |
| "step": 3328 |
| }, |
| { |
| "epoch": 0.1655350792111219, |
| "grad_norm": 0.6535385251045227, |
| "learning_rate": 0.0009488225396630347, |
| "loss": 0.897746205329895, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.17735901344048774, |
| "grad_norm": 0.4937800467014313, |
| "learning_rate": 0.0009401160421685646, |
| "loss": 0.8885043859481812, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "grad_norm": 1.3498679399490356, |
| "learning_rate": 0.0009307725649463714, |
| "loss": 0.8906658887863159, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "eval_cos_loss": 0.281152941626773, |
| "eval_loss": 0.8519824494237769, |
| "eval_mse_loss": 0.8519824494237769, |
| "flow/cos_sim": 0.718847086304399, |
| "flow/improvement_ratio": 0.47086378285601804, |
| "flow/mag_ratio_mean": 0.7225797281145505, |
| "flow/mag_ratio_std": 0.19704299981463447, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "eval_cos_loss": 0.281152941626773, |
| "eval_loss": 0.8519824494237769, |
| "eval_mse_loss": 0.8519824494237769, |
| "eval_runtime": 37.6235, |
| "eval_samples_per_second": 744.029, |
| "eval_steps_per_second": 11.642, |
| "flow/cos_sim": 0.718847086304399, |
| "flow/improvement_ratio": 0.47086378285601804, |
| "flow/mag_ratio_mean": 0.7225797281145505, |
| "flow/mag_ratio_std": 0.19704299981463447, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.20100688189921945, |
| "grad_norm": 0.24068038165569305, |
| "learning_rate": 0.0009208056308063659, |
| "loss": 0.885456383228302, |
| "step": 4352 |
| }, |
| { |
| "epoch": 0.2128308161285853, |
| "grad_norm": 0.33502769470214844, |
| "learning_rate": 0.0009102296648873445, |
| "loss": 0.8807857632637024, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.22465475035795113, |
| "grad_norm": 0.176764577627182, |
| "learning_rate": 0.0008990599737794927, |
| "loss": 0.8787178993225098, |
| "step": 4864 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "grad_norm": 0.1856887936592102, |
| "learning_rate": 0.0008873127233711644, |
| "loss": 0.8718249797821045, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "eval_cos_loss": 0.2779194979022627, |
| "eval_loss": 0.8412302969253227, |
| "eval_mse_loss": 0.8412302969253227, |
| "flow/cos_sim": 0.7220805322742898, |
| "flow/improvement_ratio": 0.4709661888749632, |
| "flow/mag_ratio_mean": 0.7200435751105008, |
| "flow/mag_ratio_std": 0.19833092303036554, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "eval_cos_loss": 0.2779194979022627, |
| "eval_loss": 0.8412302969253227, |
| "eval_mse_loss": 0.8412302969253227, |
| "eval_runtime": 37.8514, |
| "eval_samples_per_second": 739.549, |
| "eval_steps_per_second": 11.572, |
| "flow/cos_sim": 0.7220805322742898, |
| "flow/improvement_ratio": 0.4709661888749632, |
| "flow/mag_ratio_mean": 0.7200435751105008, |
| "flow/mag_ratio_std": 0.19833092303036554, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.24830261881668284, |
| "grad_norm": 0.315166711807251, |
| "learning_rate": 0.0008750049154520011, |
| "loss": 0.8694944977760315, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.2601265530460487, |
| "grad_norm": 0.6202583909034729, |
| "learning_rate": 0.0008621543631062487, |
| "loss": 0.8698850870132446, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.27195048727541454, |
| "grad_norm": 0.154885396361351, |
| "learning_rate": 0.0008487796649318904, |
| "loss": 0.866125762462616, |
| "step": 5888 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "grad_norm": 0.1593707948923111, |
| "learning_rate": 0.0008349001781229053, |
| "loss": 0.8656928539276123, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "eval_cos_loss": 0.27761552666556344, |
| "eval_loss": 0.8367355261763482, |
| "eval_mse_loss": 0.8367355261763482, |
| "flow/cos_sim": 0.7223844902428318, |
| "flow/improvement_ratio": 0.47543454585282224, |
| "flow/mag_ratio_mean": 0.7271437070685435, |
| "flow/mag_ratio_std": 0.2028201749063518, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "eval_cos_loss": 0.27761552666556344, |
| "eval_loss": 0.8367355261763482, |
| "eval_mse_loss": 0.8367355261763482, |
| "eval_runtime": 37.5912, |
| "eval_samples_per_second": 744.669, |
| "eval_steps_per_second": 11.652, |
| "flow/cos_sim": 0.7223844902428318, |
| "flow/improvement_ratio": 0.47543454585282224, |
| "flow/mag_ratio_mean": 0.7271437070685435, |
| "flow/mag_ratio_std": 0.2028201749063518, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2955983557341462, |
| "grad_norm": 0.24749045073986053, |
| "learning_rate": 0.0008205359904536107, |
| "loss": 0.8599100112915039, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.30742228996351206, |
| "grad_norm": 0.4366367757320404, |
| "learning_rate": 0.0008057078912056363, |
| "loss": 0.8599902987480164, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.3192462241928779, |
| "grad_norm": 0.2574009597301483, |
| "learning_rate": 0.0007904373410796086, |
| "loss": 0.8590140342712402, |
| "step": 6912 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "grad_norm": 0.24796369671821594, |
| "learning_rate": 0.0007747464411350876, |
| "loss": 0.8581823110580444, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "eval_cos_loss": 0.27323219904752627, |
| "eval_loss": 0.8229639839091801, |
| "eval_mse_loss": 0.8229639839091801, |
| "flow/cos_sim": 0.7267678196299566, |
| "flow/improvement_ratio": 0.4771028495814702, |
| "flow/mag_ratio_mean": 0.7125569941246346, |
| "flow/mag_ratio_std": 0.20383604319944773, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "eval_cos_loss": 0.27323219904752627, |
| "eval_loss": 0.8229639839091801, |
| "eval_mse_loss": 0.8229639839091801, |
| "eval_runtime": 37.8121, |
| "eval_samples_per_second": 740.319, |
| "eval_steps_per_second": 11.584, |
| "flow/cos_sim": 0.7267678196299566, |
| "flow/improvement_ratio": 0.4771028495814702, |
| "flow/mag_ratio_mean": 0.7125569941246346, |
| "flow/mag_ratio_std": 0.20383604319944773, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.34289409265160964, |
| "grad_norm": 0.7159921526908875, |
| "learning_rate": 0.000758657900803716, |
| "loss": 0.858252763748169, |
| "step": 7424 |
| }, |
| { |
| "epoch": 0.3547180268809755, |
| "grad_norm": 0.6132074594497681, |
| "learning_rate": 0.000742195005021869, |
| "loss": 0.8558468818664551, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.3665419611103413, |
| "grad_norm": 0.18163549900054932, |
| "learning_rate": 0.0007253815805303786, |
| "loss": 0.85396808385849, |
| "step": 7936 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "grad_norm": 0.572221040725708, |
| "learning_rate": 0.0007082419613901028, |
| "loss": 0.8530430197715759, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "eval_cos_loss": 0.2715048284427216, |
| "eval_loss": 0.8155939645691005, |
| "eval_mse_loss": 0.8155939645691005, |
| "flow/cos_sim": 0.7284952074153238, |
| "flow/improvement_ratio": 0.47482473080016707, |
| "flow/mag_ratio_mean": 0.724112270765653, |
| "flow/mag_ratio_std": 0.2020510737210104, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "eval_cos_loss": 0.2715048284427216, |
| "eval_loss": 0.8155939645691005, |
| "eval_mse_loss": 0.8155939645691005, |
| "eval_runtime": 37.7077, |
| "eval_samples_per_second": 742.367, |
| "eval_steps_per_second": 11.616, |
| "flow/cos_sim": 0.7284952074153238, |
| "flow/improvement_ratio": 0.47482473080016707, |
| "flow/mag_ratio_mean": 0.724112270765653, |
| "flow/mag_ratio_std": 0.2020510737210104, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.390189829569073, |
| "grad_norm": 0.2026418000459671, |
| "learning_rate": 0.0006908009537632514, |
| "loss": 0.8493704199790955, |
| "step": 8448 |
| }, |
| { |
| "epoch": 0.4020137637984389, |
| "grad_norm": 0.5044918656349182, |
| "learning_rate": 0.0006730838000114403, |
| "loss": 0.8506228923797607, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.41383769802780473, |
| "grad_norm": 0.2458736002445221, |
| "learning_rate": 0.0006551161421624341, |
| "loss": 0.8482614159584045, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "grad_norm": 0.7097583413124084, |
| "learning_rate": 0.0006369239847984517, |
| "loss": 0.846942126750946, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "eval_cos_loss": 0.27180098626575516, |
| "eval_loss": 0.8161548636003172, |
| "eval_mse_loss": 0.8161548636003172, |
| "flow/cos_sim": 0.7281990317993512, |
| "flow/improvement_ratio": 0.4732581986003815, |
| "flow/mag_ratio_mean": 0.7231416398803937, |
| "flow/mag_ratio_std": 0.201789186425405, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "eval_cos_loss": 0.27180098626575516, |
| "eval_loss": 0.8161548636003172, |
| "eval_mse_loss": 0.8161548636003172, |
| "eval_runtime": 37.5698, |
| "eval_samples_per_second": 745.094, |
| "eval_steps_per_second": 11.658, |
| "flow/cos_sim": 0.7281990317993512, |
| "flow/improvement_ratio": 0.4732581986003815, |
| "flow/mag_ratio_mean": 0.7231416398803937, |
| "flow/mag_ratio_std": 0.201789186425405, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4374855664865364, |
| "grad_norm": 0.3167099952697754, |
| "learning_rate": 0.0006185336574197479, |
| "loss": 0.8480910062789917, |
| "step": 9472 |
| }, |
| { |
| "epoch": 0.44930950071590225, |
| "grad_norm": 0.4810751974582672, |
| "learning_rate": 0.0005999717763379407, |
| "loss": 0.8465522527694702, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.4611334349452681, |
| "grad_norm": 0.2259974628686905, |
| "learning_rate": 0.0005812652061542363, |
| "loss": 0.844083309173584, |
| "step": 9984 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "grad_norm": 0.6505718231201172, |
| "learning_rate": 0.0005624410208783071, |
| "loss": 0.8436377644538879, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "eval_cos_loss": 0.2699208431047936, |
| "eval_loss": 0.808695926922097, |
| "eval_mse_loss": 0.808695926922097, |
| "flow/cos_sim": 0.7300791967677199, |
| "flow/improvement_ratio": 0.47688411740951886, |
| "flow/mag_ratio_mean": 0.7248269360359401, |
| "flow/mag_ratio_std": 0.20361059002544238, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "eval_cos_loss": 0.2699208431047936, |
| "eval_loss": 0.808695926922097, |
| "eval_mse_loss": 0.808695926922097, |
| "eval_runtime": 37.5891, |
| "eval_samples_per_second": 744.71, |
| "eval_steps_per_second": 11.652, |
| "flow/cos_sim": 0.7300791967677199, |
| "flow/improvement_ratio": 0.47688411740951886, |
| "flow/mag_ratio_mean": 0.7248269360359401, |
| "flow/mag_ratio_std": 0.20361059002544238, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.48478130340399983, |
| "grad_norm": 0.29194557666778564, |
| "learning_rate": 0.0005435264647440881, |
| "loss": 0.8419358730316162, |
| "step": 10496 |
| }, |
| { |
| "epoch": 0.49660523763336567, |
| "grad_norm": 0.20549535751342773, |
| "learning_rate": 0.000524548912779213, |
| "loss": 0.8400572538375854, |
| "step": 10752 |
| }, |
| { |
| "epoch": 0.5084291718627315, |
| "grad_norm": 0.7953479290008545, |
| "learning_rate": 0.0005055358311851499, |
| "loss": 0.8401579260826111, |
| "step": 11008 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "grad_norm": 0.14846356213092804, |
| "learning_rate": 0.0004865147375853812, |
| "loss": 0.840056836605072, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "eval_cos_loss": 0.2704421652642559, |
| "eval_loss": 0.8102181156476339, |
| "eval_mse_loss": 0.8102181156476339, |
| "flow/cos_sim": 0.7295578481399849, |
| "flow/improvement_ratio": 0.4786316101273445, |
| "flow/mag_ratio_mean": 0.730471750780872, |
| "flow/mag_ratio_std": 0.2050654717368078, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "eval_cos_loss": 0.2704421652642559, |
| "eval_loss": 0.8102181156476339, |
| "eval_mse_loss": 0.8102181156476339, |
| "eval_runtime": 37.702, |
| "eval_samples_per_second": 742.48, |
| "eval_steps_per_second": 11.617, |
| "flow/cos_sim": 0.7295578481399849, |
| "flow/improvement_ratio": 0.4786316101273445, |
| "flow/mag_ratio_mean": 0.730471750780872, |
| "flow/mag_ratio_std": 0.2050654717368078, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5320770403214632, |
| "grad_norm": 0.49196043610572815, |
| "learning_rate": 0.0004675131611991607, |
| "loss": 0.8376814723014832, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.5439009745508291, |
| "grad_norm": 0.3101634383201599, |
| "learning_rate": 0.0004485586029984899, |
| "loss": 0.8409138321876526, |
| "step": 11776 |
| }, |
| { |
| "epoch": 0.5557249087801949, |
| "grad_norm": 0.6860947608947754, |
| "learning_rate": 0.00042967849590597266, |
| "loss": 0.8401282429695129, |
| "step": 12032 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "grad_norm": 0.5976177453994751, |
| "learning_rate": 0.0004109001650911621, |
| "loss": 0.8376214504241943, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "eval_cos_loss": 0.2699217140742633, |
| "eval_loss": 0.8073501400479443, |
| "eval_mse_loss": 0.8073501400479443, |
| "flow/cos_sim": 0.7300783122239047, |
| "flow/improvement_ratio": 0.4786103154154129, |
| "flow/mag_ratio_mean": 0.7241527468102164, |
| "flow/mag_ratio_std": 0.20363660513946455, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "eval_cos_loss": 0.2699217140742633, |
| "eval_loss": 0.8073501400479443, |
| "eval_mse_loss": 0.8073501400479443, |
| "eval_runtime": 37.7433, |
| "eval_samples_per_second": 741.668, |
| "eval_steps_per_second": 11.605, |
| "flow/cos_sim": 0.7300783122239047, |
| "flow/improvement_ratio": 0.4786103154154129, |
| "flow/mag_ratio_mean": 0.7241527468102164, |
| "flow/mag_ratio_std": 0.20363660513946455, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5793727772389267, |
| "grad_norm": 0.19840270280838013, |
| "learning_rate": 0.0003922507884228551, |
| "loss": 0.8377624154090881, |
| "step": 12544 |
| }, |
| { |
| "epoch": 0.5911967114682924, |
| "grad_norm": 0.6053916215896606, |
| "learning_rate": 0.00037375735713457723, |
| "loss": 0.838446855545044, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.6030206456976583, |
| "grad_norm": 0.1777360439300537, |
| "learning_rate": 0.00035544663676018276, |
| "loss": 0.8392548561096191, |
| "step": 13056 |
| }, |
| { |
| "epoch": 0.6148445799270241, |
| "grad_norm": 0.2625690698623657, |
| "learning_rate": 0.00033734512839611255, |
| "loss": 0.8352001309394836, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.6148445799270241, |
| "eval_cos_loss": 0.2678832187568216, |
| "eval_loss": 0.8018131276516065, |
| "eval_mse_loss": 0.8018131276516065, |
| "flow/cos_sim": 0.732116795565984, |
| "flow/improvement_ratio": 0.47306563096231524, |
| "flow/mag_ratio_mean": 0.730011395259535, |
| "flow/mag_ratio_std": 0.20525332758007528, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.6148445799270241, |
| "eval_cos_loss": 0.2678832187568216, |
| "eval_loss": 0.8018131276516065, |
| "eval_mse_loss": 0.8018131276516065, |
| "eval_runtime": 37.6397, |
| "eval_samples_per_second": 743.71, |
| "eval_steps_per_second": 11.637, |
| "flow/cos_sim": 0.732116795565984, |
| "flow/improvement_ratio": 0.47306563096231524, |
| "flow/mag_ratio_mean": 0.730011395259535, |
| "flow/mag_ratio_std": 0.20525332758007528, |
| "step": 13312 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 21651, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|