| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 348, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 255.0, |
| "completions/max_terminated_length": 255.0, |
| "completions/mean_length": 79.640625, |
| "completions/mean_terminated_length": 79.640625, |
| "completions/min_length": 33.0, |
| "completions/min_terminated_length": 33.0, |
| "entropy": 0.5482596457004547, |
| "epoch": 0.0028735632183908046, |
| "frac_reward_zero_std": 0.34375, |
| "grad_norm": 0.004432788118720055, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "num_tokens": 442920.0, |
| "reward": 0.8734374046325684, |
| "reward_std": 0.13520026206970215, |
| "rewards/ngram_repetition2/mean": 0.0, |
| "rewards/ngram_repetition2/std": 0.0, |
| "rewards/ngram_repetition3/mean": -1.1421783710829914e-05, |
| "rewards/ngram_repetition3/std": 0.00025844547781161964, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.841796875, |
| "rewards/symbolic_reward_accuracy/std": 0.36528825759887695, |
| "rewards/symbolic_reward_partial_score/mean": 0.947265625, |
| "rewards/symbolic_reward_partial_score/std": 0.18198402225971222, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2040585279464722, |
| "sampling/importance_sampling_ratio/min": 0.007793497759848833, |
| "sampling/sampling_logp_difference/max": 4.854465484619141, |
| "sampling/sampling_logp_difference/mean": 0.27342236042022705, |
| "step": 1 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.09375, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1484375, |
| "entropy": 0.5987788438796997, |
| "epoch": 0.005747126436781609, |
| "grad_norm": 0.0029553379863500595, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 2 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.171875, |
| "clip_ratio/low_mean": 0.078125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.25, |
| "entropy": 0.5281001031398773, |
| "epoch": 0.008620689655172414, |
| "grad_norm": 0.0020463597029447556, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 3 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1875, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2421875, |
| "entropy": 0.6004053950309753, |
| "epoch": 0.011494252873563218, |
| "grad_norm": 0.002862541936337948, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 4 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 221.0, |
| "completions/max_terminated_length": 221.0, |
| "completions/mean_length": 81.396484375, |
| "completions/mean_terminated_length": 81.396484375, |
| "completions/min_length": 37.0, |
| "completions/min_terminated_length": 37.0, |
| "entropy": 0.6153657734394073, |
| "epoch": 0.014367816091954023, |
| "frac_reward_zero_std": 0.46875, |
| "grad_norm": 0.004800648894160986, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "num_tokens": 874227.0, |
| "reward": 0.831931471824646, |
| "reward_std": 0.10583364963531494, |
| "rewards/ngram_repetition2/mean": -0.00010850694525288418, |
| "rewards/ngram_repetition2/std": 0.001905819051899016, |
| "rewards/ngram_repetition3/mean": -0.00010186366125708446, |
| "rewards/ngram_repetition3/std": 0.0017140271374955773, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.787109375, |
| "rewards/symbolic_reward_accuracy/std": 0.409751296043396, |
| "rewards/symbolic_reward_partial_score/mean": 0.9365234375, |
| "rewards/symbolic_reward_partial_score/std": 0.17081154882907867, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2208325862884521, |
| "sampling/importance_sampling_ratio/min": 0.004927594680339098, |
| "sampling/sampling_logp_difference/max": 5.312904357910156, |
| "sampling/sampling_logp_difference/mean": 0.27739793062210083, |
| "step": 5 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1484375, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1796875, |
| "entropy": 0.6263215243816376, |
| "epoch": 0.017241379310344827, |
| "grad_norm": 0.001903409487567842, |
| "learning_rate": 1e-05, |
| "loss": -0.0005, |
| "step": 6 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.21875, |
| "entropy": 0.5939083099365234, |
| "epoch": 0.020114942528735632, |
| "grad_norm": 0.0020359489135444164, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 7 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1015625, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.171875, |
| "entropy": 0.5956400632858276, |
| "epoch": 0.022988505747126436, |
| "grad_norm": 0.002406664891168475, |
| "learning_rate": 1e-05, |
| "loss": 0.0006, |
| "step": 8 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 223.0, |
| "completions/max_terminated_length": 223.0, |
| "completions/mean_length": 88.787109375, |
| "completions/mean_terminated_length": 88.787109375, |
| "completions/min_length": 42.0, |
| "completions/min_terminated_length": 42.0, |
| "entropy": 0.6508974432945251, |
| "epoch": 0.02586206896551724, |
| "frac_reward_zero_std": 0.3125, |
| "grad_norm": 0.00618784548714757, |
| "learning_rate": 1e-05, |
| "loss": -0.0005, |
| "num_tokens": 1318982.0, |
| "reward": 0.8049274682998657, |
| "reward_std": 0.11365753412246704, |
| "rewards/ngram_repetition2/mean": -9.494357800576836e-05, |
| "rewards/ngram_repetition2/std": 0.0014728810638189316, |
| "rewards/ngram_repetition3/mean": -0.0003199847706127912, |
| "rewards/ngram_repetition3/std": 0.004649759270250797, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.759765625, |
| "rewards/symbolic_reward_accuracy/std": 0.4276435375213623, |
| "rewards/symbolic_reward_partial_score/mean": 0.9103189706802368, |
| "rewards/symbolic_reward_partial_score/std": 0.21451303362846375, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2356998920440674, |
| "sampling/importance_sampling_ratio/min": 0.002650650916621089, |
| "sampling/sampling_logp_difference/max": 5.932950019836426, |
| "sampling/sampling_logp_difference/mean": 0.3045163154602051, |
| "step": 9 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.265625, |
| "entropy": 0.6686981022357941, |
| "epoch": 0.028735632183908046, |
| "grad_norm": 0.002366194501519203, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 10 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1484375, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.234375, |
| "entropy": 0.6524442732334137, |
| "epoch": 0.031609195402298854, |
| "grad_norm": 0.004010654054582119, |
| "learning_rate": 1e-05, |
| "loss": 0.0005, |
| "step": 11 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1953125, |
| "clip_ratio/low_mean": 0.078125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2734375, |
| "entropy": 0.6432266533374786, |
| "epoch": 0.034482758620689655, |
| "grad_norm": 0.004558259155601263, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 12 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 277.0, |
| "completions/max_terminated_length": 277.0, |
| "completions/mean_length": 101.388671875, |
| "completions/mean_terminated_length": 101.388671875, |
| "completions/min_length": 42.0, |
| "completions/min_terminated_length": 42.0, |
| "entropy": 0.7190418243408203, |
| "epoch": 0.03735632183908046, |
| "frac_reward_zero_std": 0.15625, |
| "grad_norm": 0.010927229188382626, |
| "learning_rate": 1e-05, |
| "loss": 0.0009, |
| "num_tokens": 1792205.0, |
| "reward": 0.7988025546073914, |
| "reward_std": 0.1612505316734314, |
| "rewards/ngram_repetition2/mean": -0.0011130181374028325, |
| "rewards/ngram_repetition2/std": 0.013989781960844994, |
| "rewards/ngram_repetition3/mean": -0.0014404850080609322, |
| "rewards/ngram_repetition3/std": 0.013817409984767437, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.755859375, |
| "rewards/symbolic_reward_accuracy/std": 0.42999663949012756, |
| "rewards/symbolic_reward_partial_score/mean": 0.90234375, |
| "rewards/symbolic_reward_partial_score/std": 0.21886694431304932, |
| "rewards/tag_count_reward/mean": -0.009765625, |
| "rewards/tag_count_reward/std": 0.09843364357948303, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.267106056213379, |
| "sampling/importance_sampling_ratio/min": 0.0028255321085453033, |
| "sampling/sampling_logp_difference/max": 5.869058609008789, |
| "sampling/sampling_logp_difference/mean": 0.335277795791626, |
| "step": 13 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.09375, |
| "clip_ratio/low_mean": 0.1171875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2109375, |
| "entropy": 0.7461837530136108, |
| "epoch": 0.040229885057471264, |
| "grad_norm": 0.004749669693410397, |
| "learning_rate": 1e-05, |
| "loss": 0.0008, |
| "step": 14 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1328125, |
| "clip_ratio/low_mean": 0.1328125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.265625, |
| "entropy": 0.706035703420639, |
| "epoch": 0.04310344827586207, |
| "grad_norm": 0.0036822864785790443, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 15 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.1015625, |
| "clip_ratio/low_mean": 0.1015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.203125, |
| "entropy": 0.6989518105983734, |
| "epoch": 0.04597701149425287, |
| "grad_norm": 0.0051618898287415504, |
| "learning_rate": 1e-05, |
| "loss": -0.0005, |
| "step": 16 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 298.0, |
| "completions/max_terminated_length": 298.0, |
| "completions/mean_length": 102.732421875, |
| "completions/mean_terminated_length": 102.732421875, |
| "completions/min_length": 43.0, |
| "completions/min_terminated_length": 43.0, |
| "entropy": 0.7355360388755798, |
| "epoch": 0.04885057471264368, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.004863899666815996, |
| "learning_rate": 1e-05, |
| "loss": -0.0005, |
| "num_tokens": 2268804.0, |
| "reward": 0.7467195391654968, |
| "reward_std": 0.20338207483291626, |
| "rewards/ngram_repetition2/mean": -0.005328277125954628, |
| "rewards/ngram_repetition2/std": 0.021549751982092857, |
| "rewards/ngram_repetition3/mean": -0.005334243178367615, |
| "rewards/ngram_repetition3/std": 0.020070303231477737, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.703125, |
| "rewards/symbolic_reward_accuracy/std": 0.45732781291007996, |
| "rewards/symbolic_reward_partial_score/mean": 0.86767578125, |
| "rewards/symbolic_reward_partial_score/std": 0.28104299306869507, |
| "rewards/tag_count_reward/mean": -0.056640625, |
| "rewards/tag_count_reward/std": 0.23138070106506348, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2863795757293701, |
| "sampling/importance_sampling_ratio/min": 0.005591566674411297, |
| "sampling/sampling_logp_difference/max": 5.186495780944824, |
| "sampling/sampling_logp_difference/mean": 0.3546299636363983, |
| "step": 17 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.3203125, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.390625, |
| "entropy": 0.7249170541763306, |
| "epoch": 0.05172413793103448, |
| "grad_norm": 0.008876707404851913, |
| "learning_rate": 1e-05, |
| "loss": 0.0013, |
| "step": 18 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.3046875, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.390625, |
| "entropy": 0.7589452862739563, |
| "epoch": 0.05459770114942529, |
| "grad_norm": 0.006985923275351524, |
| "learning_rate": 1e-05, |
| "loss": 0.0017, |
| "step": 19 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1953125, |
| "clip_ratio/low_mean": 0.140625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3359375, |
| "entropy": 0.7788407206535339, |
| "epoch": 0.05747126436781609, |
| "grad_norm": 0.0038319623563438654, |
| "learning_rate": 1e-05, |
| "loss": 0.0011, |
| "step": 20 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 285.0, |
| "completions/max_terminated_length": 285.0, |
| "completions/mean_length": 107.83984375, |
| "completions/mean_terminated_length": 107.83984375, |
| "completions/min_length": 43.0, |
| "completions/min_terminated_length": 43.0, |
| "entropy": 0.735305517911911, |
| "epoch": 0.0603448275862069, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.004826927557587624, |
| "learning_rate": 1e-05, |
| "loss": -0.0006, |
| "num_tokens": 2732178.0, |
| "reward": 0.8216053247451782, |
| "reward_std": 0.14511626958847046, |
| "rewards/ngram_repetition2/mean": -0.00868980959057808, |
| "rewards/ngram_repetition2/std": 0.03146844357252121, |
| "rewards/ngram_repetition3/mean": -0.00836949236690998, |
| "rewards/ngram_repetition3/std": 0.029039273038506508, |
| "rewards/sentence_repetition/mean": -0.00014195645053405315, |
| "rewards/sentence_repetition/std": 0.0032121078111231327, |
| "rewards/symbolic_reward_accuracy/mean": 0.78125, |
| "rewards/symbolic_reward_accuracy/std": 0.41380295157432556, |
| "rewards/symbolic_reward_partial_score/mean": 0.9169921875, |
| "rewards/symbolic_reward_partial_score/std": 0.21696852147579193, |
| "rewards/tag_count_reward/mean": -0.001953125, |
| "rewards/tag_count_reward/std": 0.04419417306780815, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.287536382675171, |
| "sampling/importance_sampling_ratio/min": 0.0035747073125094175, |
| "sampling/sampling_logp_difference/max": 5.633872032165527, |
| "sampling/sampling_logp_difference/mean": 0.35755419731140137, |
| "step": 21 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1171875, |
| "clip_ratio/low_mean": 0.1171875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.234375, |
| "entropy": 0.7287033498287201, |
| "epoch": 0.06321839080459771, |
| "grad_norm": 0.008059649728238583, |
| "learning_rate": 1e-05, |
| "loss": 0.0013, |
| "step": 22 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1484375, |
| "clip_ratio/low_mean": 0.1484375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.296875, |
| "entropy": 0.7331990599632263, |
| "epoch": 0.06609195402298851, |
| "grad_norm": 0.0054933661594986916, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 23 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.171875, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.7370081543922424, |
| "epoch": 0.06896551724137931, |
| "grad_norm": 0.004683576058596373, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 24 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 248.0, |
| "completions/max_terminated_length": 248.0, |
| "completions/mean_length": 100.126953125, |
| "completions/mean_terminated_length": 100.126953125, |
| "completions/min_length": 43.0, |
| "completions/min_terminated_length": 43.0, |
| "entropy": 0.7627054750919342, |
| "epoch": 0.07183908045977011, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.007382780313491821, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "num_tokens": 3204435.0, |
| "reward": 0.8065550923347473, |
| "reward_std": 0.11376181244850159, |
| "rewards/ngram_repetition2/mean": -0.007007577456533909, |
| "rewards/ngram_repetition2/std": 0.028018856421113014, |
| "rewards/ngram_repetition3/mean": -0.006428225431591272, |
| "rewards/ngram_repetition3/std": 0.024606449529528618, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.767578125, |
| "rewards/symbolic_reward_accuracy/std": 0.42278963327407837, |
| "rewards/symbolic_reward_partial_score/mean": 0.8986002206802368, |
| "rewards/symbolic_reward_partial_score/std": 0.23486287891864777, |
| "rewards/tag_count_reward/mean": -0.001953125, |
| "rewards/tag_count_reward/std": 0.04419417306780815, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2815171480178833, |
| "sampling/importance_sampling_ratio/min": 0.003166247857734561, |
| "sampling/sampling_logp_difference/max": 5.7552080154418945, |
| "sampling/sampling_logp_difference/mean": 0.34828951954841614, |
| "step": 25 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.328125, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.359375, |
| "entropy": 0.733154833316803, |
| "epoch": 0.07471264367816093, |
| "grad_norm": 0.010408902540802956, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 26 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.234375, |
| "clip_ratio/low_mean": 0.140625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.375, |
| "entropy": 0.7431564331054688, |
| "epoch": 0.07758620689655173, |
| "grad_norm": 0.006061589810997248, |
| "learning_rate": 1e-05, |
| "loss": 0.0017, |
| "step": 27 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2578125, |
| "clip_ratio/low_mean": 0.09375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3515625, |
| "entropy": 0.6956618130207062, |
| "epoch": 0.08045977011494253, |
| "grad_norm": 0.004011357668787241, |
| "learning_rate": 1e-05, |
| "loss": 0.0005, |
| "step": 28 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.001953125, |
| "completions/max_length": 16384.0, |
| "completions/max_terminated_length": 200.0, |
| "completions/mean_length": 111.146484375, |
| "completions/mean_terminated_length": 79.3013687133789, |
| "completions/min_length": 42.0, |
| "completions/min_terminated_length": 42.0, |
| "entropy": 0.6434568762779236, |
| "epoch": 0.08333333333333333, |
| "frac_reward_zero_std": 0.125, |
| "grad_norm": 0.006815092638134956, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "num_tokens": 3704190.0, |
| "reward": 0.7337323427200317, |
| "reward_std": 0.15399469435214996, |
| "rewards/ngram_repetition2/mean": -0.004595820792019367, |
| "rewards/ngram_repetition2/std": 0.04047611355781555, |
| "rewards/ngram_repetition3/mean": -0.0048026395961642265, |
| "rewards/ngram_repetition3/std": 0.04022197425365448, |
| "rewards/sentence_repetition/mean": -0.001153680495917797, |
| "rewards/sentence_repetition/std": 0.02610480971634388, |
| "rewards/symbolic_reward_accuracy/mean": 0.677734375, |
| "rewards/symbolic_reward_accuracy/std": 0.46780112385749817, |
| "rewards/symbolic_reward_partial_score/mean": 0.86474609375, |
| "rewards/symbolic_reward_partial_score/std": 0.22939814627170563, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2359492778778076, |
| "sampling/importance_sampling_ratio/min": 0.005792928393930197, |
| "sampling/sampling_logp_difference/max": 5.151117324829102, |
| "sampling/sampling_logp_difference/mean": 0.2986485958099365, |
| "step": 29 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2421875, |
| "clip_ratio/low_mean": 0.1328125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.375, |
| "entropy": 0.6564987599849701, |
| "epoch": 0.08620689655172414, |
| "grad_norm": 0.006035366095602512, |
| "learning_rate": 1e-05, |
| "loss": 0.0294, |
| "step": 30 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.140625, |
| "clip_ratio/low_mean": 0.1484375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2890625, |
| "entropy": 0.6078056395053864, |
| "epoch": 0.08908045977011494, |
| "grad_norm": 0.005147899966686964, |
| "learning_rate": 1e-05, |
| "loss": 0.0007, |
| "step": 31 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1171875, |
| "clip_ratio/low_mean": 0.171875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2890625, |
| "entropy": 0.6026411354541779, |
| "epoch": 0.09195402298850575, |
| "grad_norm": 0.0064353933557868, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 32 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 245.0, |
| "completions/max_terminated_length": 245.0, |
| "completions/mean_length": 73.185546875, |
| "completions/mean_terminated_length": 73.185546875, |
| "completions/min_length": 39.0, |
| "completions/min_terminated_length": 39.0, |
| "entropy": 0.6158567667007446, |
| "epoch": 0.09482758620689655, |
| "frac_reward_zero_std": 0.1875, |
| "grad_norm": 0.005984546151012182, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "num_tokens": 4162653.0, |
| "reward": 0.7928154468536377, |
| "reward_std": 0.13436806201934814, |
| "rewards/ngram_repetition2/mean": -0.002782913390547037, |
| "rewards/ngram_repetition2/std": 0.01602020114660263, |
| "rewards/ngram_repetition3/mean": -0.0027794050984084606, |
| "rewards/ngram_repetition3/std": 0.015124209225177765, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.7421875, |
| "rewards/symbolic_reward_accuracy/std": 0.43785804510116577, |
| "rewards/symbolic_reward_partial_score/mean": 0.9111328125, |
| "rewards/symbolic_reward_partial_score/std": 0.19450142979621887, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2359068393707275, |
| "sampling/importance_sampling_ratio/min": 0.005337143782526255, |
| "sampling/sampling_logp_difference/max": 5.233064651489258, |
| "sampling/sampling_logp_difference/mean": 0.2945653796195984, |
| "step": 33 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2421875, |
| "clip_ratio/low_mean": 0.015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.5790786743164062, |
| "epoch": 0.09770114942528736, |
| "grad_norm": 0.004169682040810585, |
| "learning_rate": 1e-05, |
| "loss": 0.0007, |
| "step": 34 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1875, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2734375, |
| "entropy": 0.5978860259056091, |
| "epoch": 0.10057471264367816, |
| "grad_norm": 0.0042377435602247715, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 35 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1484375, |
| "clip_ratio/low_mean": 0.1015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.25, |
| "entropy": 0.6040017604827881, |
| "epoch": 0.10344827586206896, |
| "grad_norm": 0.003600814612582326, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 36 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 174.0, |
| "completions/max_terminated_length": 174.0, |
| "completions/mean_length": 63.58984375, |
| "completions/mean_terminated_length": 63.58984375, |
| "completions/min_length": 40.0, |
| "completions/min_terminated_length": 40.0, |
| "entropy": 0.5243661403656006, |
| "epoch": 0.10632183908045977, |
| "frac_reward_zero_std": 0.21875, |
| "grad_norm": 0.0034763177391141653, |
| "learning_rate": 1e-05, |
| "loss": 0.0005, |
| "num_tokens": 4622699.0, |
| "reward": 0.8271001577377319, |
| "reward_std": 0.09751666337251663, |
| "rewards/ngram_repetition2/mean": -0.0021677776239812374, |
| "rewards/ngram_repetition2/std": 0.014099263586103916, |
| "rewards/ngram_repetition3/mean": -0.0023514076601713896, |
| "rewards/ngram_repetition3/std": 0.015108847990632057, |
| "rewards/sentence_repetition/mean": -0.00030838814564049244, |
| "rewards/sentence_repetition/std": 0.006978027056902647, |
| "rewards/symbolic_reward_accuracy/mean": 0.791015625, |
| "rewards/symbolic_reward_accuracy/std": 0.40698084235191345, |
| "rewards/symbolic_reward_partial_score/mean": 0.912109375, |
| "rewards/symbolic_reward_partial_score/std": 0.21082071959972382, |
| "rewards/tag_count_reward/mean": -0.001953125, |
| "rewards/tag_count_reward/std": 0.04419417306780815, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2076072692871094, |
| "sampling/importance_sampling_ratio/min": 0.0033999995794147253, |
| "sampling/sampling_logp_difference/max": 5.6839799880981445, |
| "sampling/sampling_logp_difference/mean": 0.26283755898475647, |
| "step": 37 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1953125, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.25, |
| "entropy": 0.49053676426410675, |
| "epoch": 0.10919540229885058, |
| "grad_norm": 0.002470890525728464, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 38 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.203125, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2421875, |
| "entropy": 0.4919590651988983, |
| "epoch": 0.11206896551724138, |
| "grad_norm": 0.0052912612445652485, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 39 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2265625, |
| "clip_ratio/low_mean": 0.1171875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.34375, |
| "entropy": 0.5164909660816193, |
| "epoch": 0.11494252873563218, |
| "grad_norm": 0.002411817666143179, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 40 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 183.0, |
| "completions/max_terminated_length": 183.0, |
| "completions/mean_length": 60.0703125, |
| "completions/mean_terminated_length": 60.0703125, |
| "completions/min_length": 41.0, |
| "completions/min_terminated_length": 41.0, |
| "entropy": 0.4661720544099808, |
| "epoch": 0.11781609195402298, |
| "frac_reward_zero_std": 0.28125, |
| "grad_norm": 0.0034640042576938868, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "num_tokens": 5025199.0, |
| "reward": 0.9066269397735596, |
| "reward_std": 0.12120135873556137, |
| "rewards/ngram_repetition2/mean": -0.0005607319180853665, |
| "rewards/ngram_repetition2/std": 0.00634410185739398, |
| "rewards/ngram_repetition3/mean": -0.0008070581243373454, |
| "rewards/ngram_repetition3/std": 0.007949295453727245, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.884765625, |
| "rewards/symbolic_reward_accuracy/std": 0.3196168541908264, |
| "rewards/symbolic_reward_partial_score/mean": 0.9576822519302368, |
| "rewards/symbolic_reward_partial_score/std": 0.17431758344173431, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1969958543777466, |
| "sampling/importance_sampling_ratio/min": 0.005194148980081081, |
| "sampling/sampling_logp_difference/max": 5.260222434997559, |
| "sampling/sampling_logp_difference/mean": 0.23839689791202545, |
| "step": 41 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.2265625, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2734375, |
| "entropy": 0.42921097576618195, |
| "epoch": 0.1206896551724138, |
| "grad_norm": 0.0021978251170367002, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 42 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1640625, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1953125, |
| "entropy": 0.5135487914085388, |
| "epoch": 0.1235632183908046, |
| "grad_norm": 0.004084162879735231, |
| "learning_rate": 1e-05, |
| "loss": 0.0008, |
| "step": 43 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.1171875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2421875, |
| "entropy": 0.43603725731372833, |
| "epoch": 0.12643678160919541, |
| "grad_norm": 0.0016240986296907067, |
| "learning_rate": 1e-05, |
| "loss": 0.0005, |
| "step": 44 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.001953125, |
| "completions/max_length": 16384.0, |
| "completions/max_terminated_length": 166.0, |
| "completions/mean_length": 91.220703125, |
| "completions/mean_terminated_length": 59.33659362792969, |
| "completions/min_length": 38.0, |
| "completions/min_terminated_length": 38.0, |
| "entropy": 0.4012712836265564, |
| "epoch": 0.12931034482758622, |
| "frac_reward_zero_std": 0.3125, |
| "grad_norm": 0.0030578728765249252, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "num_tokens": 5486880.0, |
| "reward": 0.6342004537582397, |
| "reward_std": 0.12287883460521698, |
| "rewards/ngram_repetition2/mean": -0.0027574566192924976, |
| "rewards/ngram_repetition2/std": 0.04059094563126564, |
| "rewards/ngram_repetition3/mean": -0.002955373842269182, |
| "rewards/ngram_repetition3/std": 0.04163341596722603, |
| "rewards/sentence_repetition/mean": -0.001980098430067301, |
| "rewards/sentence_repetition/std": 0.03845130279660225, |
| "rewards/symbolic_reward_accuracy/mean": 0.564453125, |
| "rewards/symbolic_reward_accuracy/std": 0.49631330370903015, |
| "rewards/symbolic_reward_partial_score/mean": 0.7985026240348816, |
| "rewards/symbolic_reward_partial_score/std": 0.2610504925251007, |
| "rewards/tag_count_reward/mean": -0.00390625, |
| "rewards/tag_count_reward/std": 0.06243881583213806, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.162497878074646, |
| "sampling/importance_sampling_ratio/min": 0.003198559395968914, |
| "sampling/sampling_logp_difference/max": 5.745054721832275, |
| "sampling/sampling_logp_difference/mean": 0.20148907601833344, |
| "step": 45 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1171875, |
| "clip_ratio/low_mean": 0.125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2421875, |
| "entropy": 0.39941859245300293, |
| "epoch": 0.13218390804597702, |
| "grad_norm": 0.0021550978999584913, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "step": 46 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.1015625, |
| "clip_ratio/low_mean": 0.109375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2109375, |
| "entropy": 0.39996063709259033, |
| "epoch": 0.13505747126436782, |
| "grad_norm": 0.00215929769910872, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 47 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.25, |
| "entropy": 0.3763733506202698, |
| "epoch": 0.13793103448275862, |
| "grad_norm": 0.003114216960966587, |
| "learning_rate": 1e-05, |
| "loss": 0.0102, |
| "step": 48 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 4079.0, |
| "completions/max_terminated_length": 4079.0, |
| "completions/mean_length": 65.365234375, |
| "completions/mean_terminated_length": 65.365234375, |
| "completions/min_length": 37.0, |
| "completions/min_terminated_length": 37.0, |
| "entropy": 0.4029734432697296, |
| "epoch": 0.14080459770114942, |
| "frac_reward_zero_std": 0.34375, |
| "grad_norm": 0.004060815088450909, |
| "learning_rate": 1e-05, |
| "loss": 0.0071, |
| "num_tokens": 5925819.0, |
| "reward": 0.790141224861145, |
| "reward_std": 0.09819010645151138, |
| "rewards/ngram_repetition2/mean": -0.002248897682875395, |
| "rewards/ngram_repetition2/std": 0.042518578469753265, |
| "rewards/ngram_repetition3/mean": -0.0021793104242533445, |
| "rewards/ngram_repetition3/std": 0.04257462918758392, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.73828125, |
| "rewards/symbolic_reward_accuracy/std": 0.44000017642974854, |
| "rewards/symbolic_reward_partial_score/mean": 0.9112955331802368, |
| "rewards/symbolic_reward_partial_score/std": 0.1929275393486023, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1678524017333984, |
| "sampling/importance_sampling_ratio/min": 0.008369643241167068, |
| "sampling/sampling_logp_difference/max": 4.783143997192383, |
| "sampling/sampling_logp_difference/mean": 0.21218247711658478, |
| "step": 49 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1015625, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.171875, |
| "entropy": 0.41800519824028015, |
| "epoch": 0.14367816091954022, |
| "grad_norm": 0.001978965476155281, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 50 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.109375, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.171875, |
| "entropy": 0.39291223883628845, |
| "epoch": 0.14655172413793102, |
| "grad_norm": 0.0015093119582161307, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 51 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.15625, |
| "clip_ratio/low_mean": 0.09375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.25, |
| "entropy": 0.4259650707244873, |
| "epoch": 0.14942528735632185, |
| "grad_norm": 0.0014180849539116025, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 52 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.001953125, |
| "completions/max_length": 16384.0, |
| "completions/max_terminated_length": 143.0, |
| "completions/mean_length": 90.54296875, |
| "completions/mean_terminated_length": 58.657535552978516, |
| "completions/min_length": 39.0, |
| "completions/min_terminated_length": 39.0, |
| "entropy": 0.4426834136247635, |
| "epoch": 0.15229885057471265, |
| "frac_reward_zero_std": 0.34375, |
| "grad_norm": 0.004828969016671181, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "num_tokens": 6408689.0, |
| "reward": 0.7719333171844482, |
| "reward_std": 0.18412092328071594, |
| "rewards/ngram_repetition2/mean": -0.0019203309202566743, |
| "rewards/ngram_repetition2/std": 0.04211832955479622, |
| "rewards/ngram_repetition3/mean": -0.00201208028011024, |
| "rewards/ngram_repetition3/std": 0.04237477108836174, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.724609375, |
| "rewards/symbolic_reward_accuracy/std": 0.44714778661727905, |
| "rewards/symbolic_reward_partial_score/mean": 0.8824869394302368, |
| "rewards/symbolic_reward_partial_score/std": 0.23320595920085907, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1707079410552979, |
| "sampling/importance_sampling_ratio/min": 0.005764464382082224, |
| "sampling/sampling_logp_difference/max": 5.15604305267334, |
| "sampling/sampling_logp_difference/mean": 0.21329498291015625, |
| "step": 53 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.15625, |
| "clip_ratio/low_mean": 0.1171875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2734375, |
| "entropy": 0.43557606637477875, |
| "epoch": 0.15517241379310345, |
| "grad_norm": 0.002126704901456833, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 54 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.15625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.28125, |
| "entropy": 0.4388166666030884, |
| "epoch": 0.15804597701149425, |
| "grad_norm": 0.0041025117971003056, |
| "learning_rate": 1e-05, |
| "loss": 0.0165, |
| "step": 55 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.171875, |
| "clip_ratio/low_mean": 0.125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.296875, |
| "entropy": 0.40535806119441986, |
| "epoch": 0.16091954022988506, |
| "grad_norm": 0.0017564742593094707, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 56 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 175.0, |
| "completions/max_terminated_length": 175.0, |
| "completions/mean_length": 57.52734375, |
| "completions/mean_terminated_length": 57.52734375, |
| "completions/min_length": 38.0, |
| "completions/min_terminated_length": 38.0, |
| "entropy": 0.4543343782424927, |
| "epoch": 0.16379310344827586, |
| "frac_reward_zero_std": 0.3125, |
| "grad_norm": 0.0039548370987176895, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "num_tokens": 6834111.0, |
| "reward": 0.8162930011749268, |
| "reward_std": 0.15883183479309082, |
| "rewards/ngram_repetition2/mean": -0.0005367818521335721, |
| "rewards/ngram_repetition2/std": 0.009957203641533852, |
| "rewards/ngram_repetition3/mean": -0.0006075998535379767, |
| "rewards/ngram_repetition3/std": 0.01245367806404829, |
| "rewards/sentence_repetition/mean": -0.0004145588318351656, |
| "rewards/sentence_repetition/std": 0.009304200299084187, |
| "rewards/symbolic_reward_accuracy/mean": 0.7734375, |
| "rewards/symbolic_reward_accuracy/std": 0.4190165400505066, |
| "rewards/symbolic_reward_partial_score/mean": 0.9176431894302368, |
| "rewards/symbolic_reward_partial_score/std": 0.2047864943742752, |
| "rewards/tag_count_reward/mean": -0.00390625, |
| "rewards/tag_count_reward/std": 0.06243881583213806, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1725012063980103, |
| "sampling/importance_sampling_ratio/min": 0.008290642872452736, |
| "sampling/sampling_logp_difference/max": 4.792627811431885, |
| "sampling/sampling_logp_difference/mean": 0.22002115845680237, |
| "step": 57 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.078125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.44487476348876953, |
| "epoch": 0.16666666666666666, |
| "grad_norm": 0.0029834641609340906, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 58 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1171875, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.203125, |
| "entropy": 0.4447477012872696, |
| "epoch": 0.16954022988505746, |
| "grad_norm": 0.0029372554272413254, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "step": 59 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.171875, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2421875, |
| "entropy": 0.4102463573217392, |
| "epoch": 0.1724137931034483, |
| "grad_norm": 0.0020892529282718897, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 60 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 114.0, |
| "completions/max_terminated_length": 114.0, |
| "completions/mean_length": 54.142578125, |
| "completions/mean_terminated_length": 54.142578125, |
| "completions/min_length": 39.0, |
| "completions/min_terminated_length": 39.0, |
| "entropy": 0.4056634455919266, |
| "epoch": 0.1752873563218391, |
| "frac_reward_zero_std": 0.4375, |
| "grad_norm": 0.0013282729778438807, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "num_tokens": 7295496.0, |
| "reward": 0.7713481187820435, |
| "reward_std": 0.08887413889169693, |
| "rewards/ngram_repetition2/mean": -0.0016605097334831953, |
| "rewards/ngram_repetition2/std": 0.0266667939722538, |
| "rewards/ngram_repetition3/mean": -0.0018864045850932598, |
| "rewards/ngram_repetition3/std": 0.028560085222125053, |
| "rewards/sentence_repetition/mean": -0.00030838814564049244, |
| "rewards/sentence_repetition/std": 0.006978027056902647, |
| "rewards/symbolic_reward_accuracy/mean": 0.72265625, |
| "rewards/symbolic_reward_accuracy/std": 0.4481254518032074, |
| "rewards/symbolic_reward_partial_score/mean": 0.8857421875, |
| "rewards/symbolic_reward_partial_score/std": 0.2284853160381317, |
| "rewards/tag_count_reward/mean": -0.001953125, |
| "rewards/tag_count_reward/std": 0.04419417306780815, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1588588953018188, |
| "sampling/importance_sampling_ratio/min": 0.0037964945659041405, |
| "sampling/sampling_logp_difference/max": 5.573677062988281, |
| "sampling/sampling_logp_difference/mean": 0.20629723370075226, |
| "step": 61 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.09375, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.125, |
| "entropy": 0.40721358358860016, |
| "epoch": 0.1781609195402299, |
| "grad_norm": 0.002758313436061144, |
| "learning_rate": 1e-05, |
| "loss": 0.0006, |
| "step": 62 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1875, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.234375, |
| "entropy": 0.38856589794158936, |
| "epoch": 0.1810344827586207, |
| "grad_norm": 0.0010847699595615268, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 63 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.0859375, |
| "clip_ratio/low_mean": 0.078125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1640625, |
| "entropy": 0.40965311229228973, |
| "epoch": 0.1839080459770115, |
| "grad_norm": 0.0023630608338862658, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 64 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 107.0, |
| "completions/max_terminated_length": 107.0, |
| "completions/mean_length": 56.3203125, |
| "completions/mean_terminated_length": 56.3203125, |
| "completions/min_length": 37.0, |
| "completions/min_terminated_length": 37.0, |
| "entropy": 0.4272766709327698, |
| "epoch": 0.1867816091954023, |
| "frac_reward_zero_std": 0.28125, |
| "grad_norm": 0.0028679470997303724, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "num_tokens": 7754348.0, |
| "reward": 0.7936002016067505, |
| "reward_std": 0.130602166056633, |
| "rewards/ngram_repetition2/mean": -0.0001220703125, |
| "rewards/ngram_repetition2/std": 0.0027621358167380095, |
| "rewards/ngram_repetition3/mean": -0.0002109008200932294, |
| "rewards/ngram_repetition3/std": 0.004772140644490719, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.755859375, |
| "rewards/symbolic_reward_accuracy/std": 0.42999663949012756, |
| "rewards/symbolic_reward_partial_score/mean": 0.8816731572151184, |
| "rewards/symbolic_reward_partial_score/std": 0.2536083459854126, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1688048839569092, |
| "sampling/importance_sampling_ratio/min": 0.00808299146592617, |
| "sampling/sampling_logp_difference/max": 4.8179931640625, |
| "sampling/sampling_logp_difference/mean": 0.21086427569389343, |
| "step": 65 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.203125, |
| "clip_ratio/low_mean": 0.09375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.296875, |
| "entropy": 0.42733363807201385, |
| "epoch": 0.1896551724137931, |
| "grad_norm": 0.0032221204601228237, |
| "learning_rate": 1e-05, |
| "loss": 0.0005, |
| "step": 66 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2421875, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.296875, |
| "entropy": 0.41568654775619507, |
| "epoch": 0.1925287356321839, |
| "grad_norm": 0.002042518462985754, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 67 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.21875, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2734375, |
| "entropy": 0.42494598031044006, |
| "epoch": 0.19540229885057472, |
| "grad_norm": 0.001341567374765873, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 68 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 118.0, |
| "completions/max_terminated_length": 118.0, |
| "completions/mean_length": 57.2265625, |
| "completions/mean_terminated_length": 57.2265625, |
| "completions/min_length": 37.0, |
| "completions/min_terminated_length": 37.0, |
| "entropy": 0.48594728112220764, |
| "epoch": 0.19827586206896552, |
| "frac_reward_zero_std": 0.4375, |
| "grad_norm": 0.00340810464695096, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "num_tokens": 8192288.0, |
| "reward": 0.7913572788238525, |
| "reward_std": 0.1352584958076477, |
| "rewards/ngram_repetition2/mean": 0.0, |
| "rewards/ngram_repetition2/std": 0.0, |
| "rewards/ngram_repetition3/mean": -1.1421783710829914e-05, |
| "rewards/ngram_repetition3/std": 0.00025844547781161964, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.7421875, |
| "rewards/symbolic_reward_accuracy/std": 0.43785804510116577, |
| "rewards/symbolic_reward_partial_score/mean": 0.9060872793197632, |
| "rewards/symbolic_reward_partial_score/std": 0.20613642036914825, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1853888034820557, |
| "sampling/importance_sampling_ratio/min": 0.003909863531589508, |
| "sampling/sampling_logp_difference/max": 5.544252872467041, |
| "sampling/sampling_logp_difference/mean": 0.23184895515441895, |
| "step": 69 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.15625, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1875, |
| "entropy": 0.46007974445819855, |
| "epoch": 0.20114942528735633, |
| "grad_norm": 0.0018378323875367641, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 70 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1875, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.449898436665535, |
| "epoch": 0.20402298850574713, |
| "grad_norm": 0.0010113210882991552, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 71 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.109375, |
| "clip_ratio/low_mean": 0.09375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.203125, |
| "entropy": 0.4371645599603653, |
| "epoch": 0.20689655172413793, |
| "grad_norm": 0.0015104720368981361, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 72 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 315.0, |
| "completions/max_terminated_length": 315.0, |
| "completions/mean_length": 58.287109375, |
| "completions/mean_terminated_length": 58.287109375, |
| "completions/min_length": 38.0, |
| "completions/min_terminated_length": 38.0, |
| "entropy": 0.46672871708869934, |
| "epoch": 0.20977011494252873, |
| "frac_reward_zero_std": 0.4375, |
| "grad_norm": 0.0020025873091071844, |
| "learning_rate": 1e-05, |
| "loss": -0.0005, |
| "num_tokens": 8631251.0, |
| "reward": 0.8614984750747681, |
| "reward_std": 0.11169316619634628, |
| "rewards/ngram_repetition2/mean": -0.0012671099975705147, |
| "rewards/ngram_repetition2/std": 0.028671424835920334, |
| "rewards/ngram_repetition3/mean": -0.0012333698105067015, |
| "rewards/ngram_repetition3/std": 0.02623102255165577, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.826171875, |
| "rewards/symbolic_reward_accuracy/std": 0.3793322443962097, |
| "rewards/symbolic_reward_partial_score/mean": 0.9440103769302368, |
| "rewards/symbolic_reward_partial_score/std": 0.18596942722797394, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1941895484924316, |
| "sampling/importance_sampling_ratio/min": 0.002809051424264908, |
| "sampling/sampling_logp_difference/max": 5.874908447265625, |
| "sampling/sampling_logp_difference/mean": 0.24140113592147827, |
| "step": 73 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1953125, |
| "entropy": 0.4615946561098099, |
| "epoch": 0.21264367816091953, |
| "grad_norm": 0.0036014795769006014, |
| "learning_rate": 1e-05, |
| "loss": 0.0005, |
| "step": 74 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1328125, |
| "clip_ratio/low_mean": 0.0234375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.15625, |
| "entropy": 0.50718954205513, |
| "epoch": 0.21551724137931033, |
| "grad_norm": 0.002646596170961857, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 75 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.1015625, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.15625, |
| "entropy": 0.464016318321228, |
| "epoch": 0.21839080459770116, |
| "grad_norm": 0.002083716681227088, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 76 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 140.0, |
| "completions/max_terminated_length": 140.0, |
| "completions/mean_length": 58.1015625, |
| "completions/mean_terminated_length": 58.1015625, |
| "completions/min_length": 40.0, |
| "completions/min_terminated_length": 40.0, |
| "entropy": 0.45281049609184265, |
| "epoch": 0.22126436781609196, |
| "frac_reward_zero_std": 0.28125, |
| "grad_norm": 0.0050986045971512794, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "num_tokens": 9085159.0, |
| "reward": 0.851704478263855, |
| "reward_std": 0.1377110630273819, |
| "rewards/ngram_repetition2/mean": -0.0002638691512402147, |
| "rewards/ngram_repetition2/std": 0.0059706768952310085, |
| "rewards/ngram_repetition3/mean": -0.0001890077255666256, |
| "rewards/ngram_repetition3/std": 0.003657597815617919, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.818359375, |
| "rewards/symbolic_reward_accuracy/std": 0.38592514395713806, |
| "rewards/symbolic_reward_partial_score/mean": 0.9295247197151184, |
| "rewards/symbolic_reward_partial_score/std": 0.20194244384765625, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1843516826629639, |
| "sampling/importance_sampling_ratio/min": 0.007268482819199562, |
| "sampling/sampling_logp_difference/max": 4.92420768737793, |
| "sampling/sampling_logp_difference/mean": 0.22934292256832123, |
| "step": 77 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1328125, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1796875, |
| "entropy": 0.45783525705337524, |
| "epoch": 0.22413793103448276, |
| "grad_norm": 0.0012645251117646694, |
| "learning_rate": 1e-05, |
| "loss": -0.0004, |
| "step": 78 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1484375, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.234375, |
| "entropy": 0.4066694378852844, |
| "epoch": 0.22701149425287356, |
| "grad_norm": 0.0011590078938752413, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 79 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.1640625, |
| "clip_ratio/low_mean": 0.1015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.265625, |
| "entropy": 0.44288161396980286, |
| "epoch": 0.22988505747126436, |
| "grad_norm": 0.0016866448568180203, |
| "learning_rate": 1e-05, |
| "loss": 0.0005, |
| "step": 80 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 121.0, |
| "completions/max_terminated_length": 121.0, |
| "completions/mean_length": 58.046875, |
| "completions/mean_terminated_length": 58.046875, |
| "completions/min_length": 39.0, |
| "completions/min_terminated_length": 39.0, |
| "entropy": 0.49261198937892914, |
| "epoch": 0.23275862068965517, |
| "frac_reward_zero_std": 0.40625, |
| "grad_norm": 0.0027653626166284084, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "num_tokens": 9526847.0, |
| "reward": 0.8708001375198364, |
| "reward_std": 0.13339784741401672, |
| "rewards/ngram_repetition2/mean": -2.26501560973702e-05, |
| "rewards/ngram_repetition2/std": 0.00040280655957758427, |
| "rewards/ngram_repetition3/mean": -4.4448628614190966e-05, |
| "rewards/ngram_repetition3/std": 0.0010057577164843678, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.845703125, |
| "rewards/symbolic_reward_accuracy/std": 0.36158639192581177, |
| "rewards/symbolic_reward_partial_score/mean": 0.9293619394302368, |
| "rewards/symbolic_reward_partial_score/std": 0.23605132102966309, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1911598443984985, |
| "sampling/importance_sampling_ratio/min": 0.00648106262087822, |
| "sampling/sampling_logp_difference/max": 5.038870811462402, |
| "sampling/sampling_logp_difference/mean": 0.23612119257450104, |
| "step": 81 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.15625, |
| "clip_ratio/low_mean": 0.015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.171875, |
| "entropy": 0.441509410738945, |
| "epoch": 0.23563218390804597, |
| "grad_norm": 0.004880265332758427, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 82 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1875, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2421875, |
| "entropy": 0.4431619942188263, |
| "epoch": 0.23850574712643677, |
| "grad_norm": 0.0035018131602555513, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 83 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1015625, |
| "clip_ratio/low_mean": 0.109375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2109375, |
| "entropy": 0.4725227504968643, |
| "epoch": 0.2413793103448276, |
| "grad_norm": 0.0032173239160329103, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "step": 84 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 132.0, |
| "completions/max_terminated_length": 132.0, |
| "completions/mean_length": 56.25390625, |
| "completions/mean_terminated_length": 56.25390625, |
| "completions/min_length": 40.0, |
| "completions/min_terminated_length": 40.0, |
| "entropy": 0.4439745992422104, |
| "epoch": 0.2442528735632184, |
| "frac_reward_zero_std": 0.4375, |
| "grad_norm": 0.0022925150115042925, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "num_tokens": 9979969.0, |
| "reward": 0.7895450592041016, |
| "reward_std": 0.11284206807613373, |
| "rewards/ngram_repetition2/mean": -0.0002489655453246087, |
| "rewards/ngram_repetition2/std": 0.005007500294595957, |
| "rewards/ngram_repetition3/mean": -0.00032552084303461015, |
| "rewards/ngram_repetition3/std": 0.007365696132183075, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.73046875, |
| "rewards/symbolic_reward_accuracy/std": 0.44415023922920227, |
| "rewards/symbolic_reward_partial_score/mean": 0.9274088144302368, |
| "rewards/symbolic_reward_partial_score/std": 0.17746692895889282, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.18569016456604, |
| "sampling/importance_sampling_ratio/min": 0.009709770791232586, |
| "sampling/sampling_logp_difference/max": 4.634622573852539, |
| "sampling/sampling_logp_difference/mean": 0.23130828142166138, |
| "step": 85 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.15625, |
| "entropy": 0.45062993466854095, |
| "epoch": 0.2471264367816092, |
| "grad_norm": 0.002243473893031478, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 86 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.15625, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2109375, |
| "entropy": 0.48541413247585297, |
| "epoch": 0.25, |
| "grad_norm": 0.002420612843707204, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 87 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1171875, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1875, |
| "entropy": 0.4482133090496063, |
| "epoch": 0.25287356321839083, |
| "grad_norm": 0.0013555807527154684, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 88 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 126.0, |
| "completions/max_terminated_length": 126.0, |
| "completions/mean_length": 55.689453125, |
| "completions/mean_terminated_length": 55.689453125, |
| "completions/min_length": 37.0, |
| "completions/min_terminated_length": 37.0, |
| "entropy": 0.40743130445480347, |
| "epoch": 0.2557471264367816, |
| "frac_reward_zero_std": 0.375, |
| "grad_norm": 0.004098931793123484, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "num_tokens": 10420770.0, |
| "reward": 0.64892578125, |
| "reward_std": 0.11810654401779175, |
| "rewards/ngram_repetition2/mean": 0.0, |
| "rewards/ngram_repetition2/std": 0.0, |
| "rewards/ngram_repetition3/mean": 0.0, |
| "rewards/ngram_repetition3/std": 0.0, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.5703125, |
| "rewards/symbolic_reward_accuracy/std": 0.4955156147480011, |
| "rewards/symbolic_reward_partial_score/mean": 0.8323567509651184, |
| "rewards/symbolic_reward_partial_score/std": 0.27279549837112427, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.178039789199829, |
| "sampling/importance_sampling_ratio/min": 0.0019275805680081248, |
| "sampling/sampling_logp_difference/max": 6.251489639282227, |
| "sampling/sampling_logp_difference/mean": 0.21803206205368042, |
| "step": 89 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.140625, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.203125, |
| "entropy": 0.41286924481391907, |
| "epoch": 0.25862068965517243, |
| "grad_norm": 0.0021748561412096024, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 90 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.0859375, |
| "clip_ratio/low_mean": 0.109375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1953125, |
| "entropy": 0.4246648848056793, |
| "epoch": 0.2614942528735632, |
| "grad_norm": 0.0018133048433810472, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "step": 91 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1171875, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1640625, |
| "entropy": 0.4122104048728943, |
| "epoch": 0.26436781609195403, |
| "grad_norm": 0.002096879994496703, |
| "learning_rate": 1e-05, |
| "loss": -0.0004, |
| "step": 92 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 100.0, |
| "completions/max_terminated_length": 100.0, |
| "completions/mean_length": 56.546875, |
| "completions/mean_terminated_length": 56.546875, |
| "completions/min_length": 39.0, |
| "completions/min_terminated_length": 39.0, |
| "entropy": 0.42113952338695526, |
| "epoch": 0.2672413793103448, |
| "frac_reward_zero_std": 0.4375, |
| "grad_norm": 0.0021440223790705204, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "num_tokens": 10848858.0, |
| "reward": 0.8250000476837158, |
| "reward_std": 0.11331714689731598, |
| "rewards/ngram_repetition2/mean": 0.0, |
| "rewards/ngram_repetition2/std": 0.0, |
| "rewards/ngram_repetition3/mean": 0.0, |
| "rewards/ngram_repetition3/std": 0.0, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.791015625, |
| "rewards/symbolic_reward_accuracy/std": 0.40698084235191345, |
| "rewards/symbolic_reward_partial_score/mean": 0.904296875, |
| "rewards/symbolic_reward_partial_score/std": 0.24538351595401764, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1647822856903076, |
| "sampling/importance_sampling_ratio/min": 0.00526386359706521, |
| "sampling/sampling_logp_difference/max": 5.246890068054199, |
| "sampling/sampling_logp_difference/mean": 0.20634809136390686, |
| "step": 93 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.140625, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1875, |
| "entropy": 0.4066851735115051, |
| "epoch": 0.27011494252873564, |
| "grad_norm": 0.0021633717697113752, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 94 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.15625, |
| "clip_ratio/low_mean": 0.0234375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1796875, |
| "entropy": 0.3979349434375763, |
| "epoch": 0.27298850574712646, |
| "grad_norm": 0.0010904585942626, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 95 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.171875, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2109375, |
| "entropy": 0.40239329636096954, |
| "epoch": 0.27586206896551724, |
| "grad_norm": 0.0020343970973044634, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 96 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 93.0, |
| "completions/max_terminated_length": 93.0, |
| "completions/mean_length": 55.3828125, |
| "completions/mean_terminated_length": 55.3828125, |
| "completions/min_length": 37.0, |
| "completions/min_terminated_length": 37.0, |
| "entropy": 0.41389578580856323, |
| "epoch": 0.27873563218390807, |
| "frac_reward_zero_std": 0.34375, |
| "grad_norm": 0.0023732734844088554, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "num_tokens": 11286334.0, |
| "reward": 0.762939453125, |
| "reward_std": 0.13904184103012085, |
| "rewards/ngram_repetition2/mean": 0.0, |
| "rewards/ngram_repetition2/std": 0.0, |
| "rewards/ngram_repetition3/mean": 0.0, |
| "rewards/ngram_repetition3/std": 0.0, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.708984375, |
| "rewards/symbolic_reward_accuracy/std": 0.45467492938041687, |
| "rewards/symbolic_reward_partial_score/mean": 0.8888345956802368, |
| "rewards/symbolic_reward_partial_score/std": 0.23015637695789337, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1618776321411133, |
| "sampling/importance_sampling_ratio/min": 0.004022667650133371, |
| "sampling/sampling_logp_difference/max": 5.515810012817383, |
| "sampling/sampling_logp_difference/mean": 0.2021733969449997, |
| "step": 97 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1640625, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2265625, |
| "entropy": 0.3943755477666855, |
| "epoch": 0.28160919540229884, |
| "grad_norm": 0.0024514112155884504, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "step": 98 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1640625, |
| "clip_ratio/low_mean": 0.125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2890625, |
| "entropy": 0.374920517206192, |
| "epoch": 0.28448275862068967, |
| "grad_norm": 0.0016932882135733962, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 99 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1484375, |
| "clip_ratio/low_mean": 0.125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2734375, |
| "entropy": 0.41551171243190765, |
| "epoch": 0.28735632183908044, |
| "grad_norm": 0.001992169301956892, |
| "learning_rate": 1e-05, |
| "loss": -0.0004, |
| "step": 100 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 97.0, |
| "completions/max_terminated_length": 97.0, |
| "completions/mean_length": 56.2109375, |
| "completions/mean_terminated_length": 56.2109375, |
| "completions/min_length": 36.0, |
| "completions/min_terminated_length": 36.0, |
| "entropy": 0.39603303372859955, |
| "epoch": 0.29022988505747127, |
| "frac_reward_zero_std": 0.4375, |
| "grad_norm": 0.0020668664947152138, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "num_tokens": 11720906.0, |
| "reward": 0.751953125, |
| "reward_std": 0.11652664840221405, |
| "rewards/ngram_repetition2/mean": 0.0, |
| "rewards/ngram_repetition2/std": 0.0, |
| "rewards/ngram_repetition3/mean": 0.0, |
| "rewards/ngram_repetition3/std": 0.0, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.701171875, |
| "rewards/symbolic_reward_accuracy/std": 0.45819199085235596, |
| "rewards/symbolic_reward_partial_score/mean": 0.8704427480697632, |
| "rewards/symbolic_reward_partial_score/std": 0.24692247807979584, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.166778326034546, |
| "sampling/importance_sampling_ratio/min": 0.005492182448506355, |
| "sampling/sampling_logp_difference/max": 5.204429626464844, |
| "sampling/sampling_logp_difference/mean": 0.20504958927631378, |
| "step": 101 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.15625, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.21875, |
| "entropy": 0.42469410598278046, |
| "epoch": 0.29310344827586204, |
| "grad_norm": 0.0026059469673782587, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 102 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1875, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.25, |
| "entropy": 0.4196999818086624, |
| "epoch": 0.2959770114942529, |
| "grad_norm": 0.0022485863883048296, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 103 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.171875, |
| "entropy": 0.4103551357984543, |
| "epoch": 0.2988505747126437, |
| "grad_norm": 0.0018114866688847542, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 104 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 117.0, |
| "completions/max_terminated_length": 117.0, |
| "completions/mean_length": 58.57421875, |
| "completions/mean_terminated_length": 58.57421875, |
| "completions/min_length": 36.0, |
| "completions/min_terminated_length": 36.0, |
| "entropy": 0.46819332242012024, |
| "epoch": 0.3017241379310345, |
| "frac_reward_zero_std": 0.59375, |
| "grad_norm": 0.0030483694281429052, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "num_tokens": 12168720.0, |
| "reward": 0.741650402545929, |
| "reward_std": 0.09308037161827087, |
| "rewards/ngram_repetition2/mean": 0.0, |
| "rewards/ngram_repetition2/std": 0.0, |
| "rewards/ngram_repetition3/mean": 0.0, |
| "rewards/ngram_repetition3/std": 0.0, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.6875, |
| "rewards/symbolic_reward_accuracy/std": 0.4639657139778137, |
| "rewards/symbolic_reward_partial_score/mean": 0.8680012822151184, |
| "rewards/symbolic_reward_partial_score/std": 0.25573408603668213, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1807539463043213, |
| "sampling/importance_sampling_ratio/min": 0.0036130903754383326, |
| "sampling/sampling_logp_difference/max": 5.623191833496094, |
| "sampling/sampling_logp_difference/mean": 0.22634021937847137, |
| "step": 105 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.09375, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1640625, |
| "entropy": 0.447001576423645, |
| "epoch": 0.3045977011494253, |
| "grad_norm": 0.0014942652778699994, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 106 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.0859375, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.140625, |
| "entropy": 0.420550674200058, |
| "epoch": 0.3074712643678161, |
| "grad_norm": 0.001921923947520554, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 107 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.078125, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1484375, |
| "entropy": 0.4742784798145294, |
| "epoch": 0.3103448275862069, |
| "grad_norm": 0.0013419737806543708, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "step": 108 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 108.0, |
| "completions/max_terminated_length": 108.0, |
| "completions/mean_length": 58.876953125, |
| "completions/mean_terminated_length": 58.876953125, |
| "completions/min_length": 40.0, |
| "completions/min_terminated_length": 40.0, |
| "entropy": 0.4446108043193817, |
| "epoch": 0.3132183908045977, |
| "frac_reward_zero_std": 0.4375, |
| "grad_norm": 0.0027625032234936953, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "num_tokens": 12607825.0, |
| "reward": 0.7533203363418579, |
| "reward_std": 0.1126197874546051, |
| "rewards/ngram_repetition2/mean": 0.0, |
| "rewards/ngram_repetition2/std": 0.0, |
| "rewards/ngram_repetition3/mean": 0.0, |
| "rewards/ngram_repetition3/std": 0.0, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.70703125, |
| "rewards/symbolic_reward_accuracy/std": 0.455569326877594, |
| "rewards/symbolic_reward_partial_score/mean": 0.861328125, |
| "rewards/symbolic_reward_partial_score/std": 0.29597893357276917, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1799287796020508, |
| "sampling/importance_sampling_ratio/min": 0.006384609267115593, |
| "sampling/sampling_logp_difference/max": 5.0538649559021, |
| "sampling/sampling_logp_difference/mean": 0.22297433018684387, |
| "step": 109 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.15625, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1953125, |
| "entropy": 0.43454255163669586, |
| "epoch": 0.3160919540229885, |
| "grad_norm": 0.0020271167159080505, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 110 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2109375, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2734375, |
| "entropy": 0.4507320821285248, |
| "epoch": 0.31896551724137934, |
| "grad_norm": 0.0015110382810235023, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 111 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.140625, |
| "clip_ratio/low_mean": 0.09375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.234375, |
| "entropy": 0.4569307267665863, |
| "epoch": 0.3218390804597701, |
| "grad_norm": 0.002051499206572771, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 112 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 162.0, |
| "completions/max_terminated_length": 162.0, |
| "completions/mean_length": 61.46484375, |
| "completions/mean_terminated_length": 61.46484375, |
| "completions/min_length": 34.0, |
| "completions/min_terminated_length": 34.0, |
| "entropy": 0.5046162307262421, |
| "epoch": 0.32471264367816094, |
| "frac_reward_zero_std": 0.5, |
| "grad_norm": 0.003583670826628804, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "num_tokens": 13056959.0, |
| "reward": 0.7070310115814209, |
| "reward_std": 0.09193491190671921, |
| "rewards/ngram_repetition2/mean": -1.3152356586942915e-05, |
| "rewards/ngram_repetition2/std": 0.00029760386678390205, |
| "rewards/ngram_repetition3/mean": -1.181027982966043e-05, |
| "rewards/ngram_repetition3/std": 0.0002672361151780933, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.646484375, |
| "rewards/symbolic_reward_accuracy/std": 0.47852855920791626, |
| "rewards/symbolic_reward_partial_score/mean": 0.8483072519302368, |
| "rewards/symbolic_reward_partial_score/std": 0.29286909103393555, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1988115310668945, |
| "sampling/importance_sampling_ratio/min": 0.0016131963348016143, |
| "sampling/sampling_logp_difference/max": 6.429537773132324, |
| "sampling/sampling_logp_difference/mean": 0.24746036529541016, |
| "step": 113 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.0859375, |
| "clip_ratio/low_mean": 0.015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1015625, |
| "entropy": 0.49996377527713776, |
| "epoch": 0.3275862068965517, |
| "grad_norm": 0.001245135790668428, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 114 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.15625, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1953125, |
| "entropy": 0.5030421316623688, |
| "epoch": 0.33045977011494254, |
| "grad_norm": 0.003247750224545598, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 115 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1484375, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1875, |
| "entropy": 0.5156670063734055, |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.00133526383433491, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 116 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 10776.0, |
| "completions/max_terminated_length": 10776.0, |
| "completions/mean_length": 86.06640625, |
| "completions/mean_terminated_length": 86.06640625, |
| "completions/min_length": 41.0, |
| "completions/min_terminated_length": 41.0, |
| "entropy": 0.5009976476430893, |
| "epoch": 0.33620689655172414, |
| "frac_reward_zero_std": 0.375, |
| "grad_norm": 0.009771936573088169, |
| "learning_rate": 1e-05, |
| "loss": 0.0198, |
| "num_tokens": 13515681.0, |
| "reward": 0.8136388063430786, |
| "reward_std": 0.09995077550411224, |
| "rewards/ngram_repetition2/mean": -0.001687212847173214, |
| "rewards/ngram_repetition2/std": 0.03696763888001442, |
| "rewards/ngram_repetition3/mean": -0.0016231336630880833, |
| "rewards/ngram_repetition3/std": 0.036507681012153625, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.7734375, |
| "rewards/symbolic_reward_accuracy/std": 0.4190165400505066, |
| "rewards/symbolic_reward_partial_score/mean": 0.908203125, |
| "rewards/symbolic_reward_partial_score/std": 0.21997693181037903, |
| "rewards/tag_count_reward/mean": -0.001953125, |
| "rewards/tag_count_reward/std": 0.04419417306780815, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2038259506225586, |
| "sampling/importance_sampling_ratio/min": 8.807555423118174e-05, |
| "sampling/sampling_logp_difference/max": 9.337315559387207, |
| "sampling/sampling_logp_difference/mean": 0.246160626411438, |
| "step": 117 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1953125, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2265625, |
| "entropy": 0.5377438366413116, |
| "epoch": 0.3390804597701149, |
| "grad_norm": 0.0017847833223640919, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 118 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.1328125, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1640625, |
| "entropy": 0.5264367163181305, |
| "epoch": 0.34195402298850575, |
| "grad_norm": 0.003602989250794053, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 119 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.0859375, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.15625, |
| "entropy": 0.5324976742267609, |
| "epoch": 0.3448275862068966, |
| "grad_norm": 0.002521625952795148, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 120 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 170.0, |
| "completions/max_terminated_length": 170.0, |
| "completions/mean_length": 70.751953125, |
| "completions/mean_terminated_length": 70.751953125, |
| "completions/min_length": 37.0, |
| "completions/min_terminated_length": 37.0, |
| "entropy": 0.6152905225753784, |
| "epoch": 0.34770114942528735, |
| "frac_reward_zero_std": 0.375, |
| "grad_norm": 0.0035737582948058844, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "num_tokens": 13938850.0, |
| "reward": 0.9204086065292358, |
| "reward_std": 0.13115757703781128, |
| "rewards/ngram_repetition2/mean": -0.0001035748136928305, |
| "rewards/ngram_repetition2/std": 0.0023436304181814194, |
| "rewards/ngram_repetition3/mean": -5.3146257414482534e-05, |
| "rewards/ngram_repetition3/std": 0.001106699462980032, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.8984375, |
| "rewards/symbolic_reward_accuracy/std": 0.30236753821372986, |
| "rewards/symbolic_reward_partial_score/mean": 0.9716796875, |
| "rewards/symbolic_reward_partial_score/std": 0.1532812863588333, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2282681465148926, |
| "sampling/importance_sampling_ratio/min": 0.0026613196823745966, |
| "sampling/sampling_logp_difference/max": 5.928933143615723, |
| "sampling/sampling_logp_difference/mean": 0.27953964471817017, |
| "step": 121 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1640625, |
| "entropy": 0.6114228069782257, |
| "epoch": 0.3505747126436782, |
| "grad_norm": 0.0028497313614934683, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 122 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.1171875, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.15625, |
| "entropy": 0.6111920773983002, |
| "epoch": 0.35344827586206895, |
| "grad_norm": 0.0034871206153184175, |
| "learning_rate": 1e-05, |
| "loss": -0.0005, |
| "step": 123 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.09375, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1796875, |
| "entropy": 0.5921016335487366, |
| "epoch": 0.3563218390804598, |
| "grad_norm": 0.0030272386502474546, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "step": 124 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 166.0, |
| "completions/max_terminated_length": 166.0, |
| "completions/mean_length": 73.875, |
| "completions/mean_terminated_length": 73.875, |
| "completions/min_length": 43.0, |
| "completions/min_terminated_length": 43.0, |
| "entropy": 0.5922946929931641, |
| "epoch": 0.35919540229885055, |
| "frac_reward_zero_std": 0.4375, |
| "grad_norm": 0.005635711830109358, |
| "learning_rate": 1e-05, |
| "loss": 0.0008, |
| "num_tokens": 14404162.0, |
| "reward": 0.766205906867981, |
| "reward_std": 0.08414055407047272, |
| "rewards/ngram_repetition2/mean": -0.0003033262328244746, |
| "rewards/ngram_repetition2/std": 0.004093645140528679, |
| "rewards/ngram_repetition3/mean": -0.000197924004169181, |
| "rewards/ngram_repetition3/std": 0.0027138942386955023, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.716796875, |
| "rewards/symbolic_reward_accuracy/std": 0.4509948492050171, |
| "rewards/symbolic_reward_partial_score/mean": 0.8815103769302368, |
| "rewards/symbolic_reward_partial_score/std": 0.22402197122573853, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.238568663597107, |
| "sampling/importance_sampling_ratio/min": 0.0008153519011102617, |
| "sampling/sampling_logp_difference/max": 7.11189079284668, |
| "sampling/sampling_logp_difference/mean": 0.29386207461357117, |
| "step": 125 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.1328125, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1640625, |
| "entropy": 0.6237926781177521, |
| "epoch": 0.3620689655172414, |
| "grad_norm": 0.0012159041361883283, |
| "learning_rate": 1e-05, |
| "loss": 0.0005, |
| "step": 126 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1875, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.21875, |
| "entropy": 0.6169875264167786, |
| "epoch": 0.3649425287356322, |
| "grad_norm": 0.0013761859154328704, |
| "learning_rate": 1e-05, |
| "loss": -0.0004, |
| "step": 127 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.0234375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1484375, |
| "entropy": 0.6234523355960846, |
| "epoch": 0.367816091954023, |
| "grad_norm": 0.0022137663327157497, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 128 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 158.0, |
| "completions/max_terminated_length": 158.0, |
| "completions/mean_length": 72.220703125, |
| "completions/mean_terminated_length": 72.220703125, |
| "completions/min_length": 41.0, |
| "completions/min_terminated_length": 41.0, |
| "entropy": 0.642326831817627, |
| "epoch": 0.3706896551724138, |
| "frac_reward_zero_std": 0.25, |
| "grad_norm": 0.004551479127258062, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "num_tokens": 14868307.0, |
| "reward": 0.7379156351089478, |
| "reward_std": 0.1207510381937027, |
| "rewards/ngram_repetition2/mean": -0.0011725000804290175, |
| "rewards/ngram_repetition2/std": 0.011009343899786472, |
| "rewards/ngram_repetition3/mean": -0.001207483932375908, |
| "rewards/ngram_repetition3/std": 0.011307465843856335, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.677734375, |
| "rewards/symbolic_reward_accuracy/std": 0.46780112385749817, |
| "rewards/symbolic_reward_partial_score/mean": 0.87841796875, |
| "rewards/symbolic_reward_partial_score/std": 0.2543152868747711, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2485902309417725, |
| "sampling/importance_sampling_ratio/min": 0.005005154758691788, |
| "sampling/sampling_logp_difference/max": 5.2972869873046875, |
| "sampling/sampling_logp_difference/mean": 0.30063968896865845, |
| "step": 129 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.140625, |
| "clip_ratio/low_mean": 0.078125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.21875, |
| "entropy": 0.6359215378761292, |
| "epoch": 0.3735632183908046, |
| "grad_norm": 0.003405208932235837, |
| "learning_rate": 1e-05, |
| "loss": 0.0007, |
| "step": 130 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.234375, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3046875, |
| "entropy": 0.6392008662223816, |
| "epoch": 0.3764367816091954, |
| "grad_norm": 0.0017990770284086466, |
| "learning_rate": 1e-05, |
| "loss": -0.0005, |
| "step": 131 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.203125, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2890625, |
| "entropy": 0.5917164087295532, |
| "epoch": 0.3793103448275862, |
| "grad_norm": 0.0019928663969039917, |
| "learning_rate": 1e-05, |
| "loss": -0.0004, |
| "step": 132 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 173.0, |
| "completions/max_terminated_length": 173.0, |
| "completions/mean_length": 73.037109375, |
| "completions/mean_terminated_length": 73.037109375, |
| "completions/min_length": 42.0, |
| "completions/min_terminated_length": 42.0, |
| "entropy": 0.6280420422554016, |
| "epoch": 0.382183908045977, |
| "frac_reward_zero_std": 0.28125, |
| "grad_norm": 0.005952598061412573, |
| "learning_rate": 1e-05, |
| "loss": 0.0005, |
| "num_tokens": 15327014.0, |
| "reward": 0.7588248252868652, |
| "reward_std": 0.10919959098100662, |
| "rewards/ngram_repetition2/mean": -0.0005840057274326682, |
| "rewards/ngram_repetition2/std": 0.008051936514675617, |
| "rewards/ngram_repetition3/mean": -0.0007190246833488345, |
| "rewards/ngram_repetition3/std": 0.008538857102394104, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.712890625, |
| "rewards/symbolic_reward_accuracy/std": 0.45285552740097046, |
| "rewards/symbolic_reward_partial_score/mean": 0.86669921875, |
| "rewards/symbolic_reward_partial_score/std": 0.27006658911705017, |
| "rewards/tag_count_reward/mean": -0.001953125, |
| "rewards/tag_count_reward/std": 0.04419417306780815, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.249870777130127, |
| "sampling/importance_sampling_ratio/min": 0.0022422403562813997, |
| "sampling/sampling_logp_difference/max": 6.100279808044434, |
| "sampling/sampling_logp_difference/mean": 0.2998029589653015, |
| "step": 133 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.203125, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.234375, |
| "entropy": 0.6212248504161835, |
| "epoch": 0.3850574712643678, |
| "grad_norm": 0.003057986032217741, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 134 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.21875, |
| "clip_ratio/low_mean": 0.078125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.296875, |
| "entropy": 0.6091723740100861, |
| "epoch": 0.3879310344827586, |
| "grad_norm": 0.0012814750662073493, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 135 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1640625, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2109375, |
| "entropy": 0.6359426975250244, |
| "epoch": 0.39080459770114945, |
| "grad_norm": 0.0027223494835197926, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "step": 136 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 129.0, |
| "completions/max_terminated_length": 129.0, |
| "completions/mean_length": 68.91015625, |
| "completions/mean_terminated_length": 68.91015625, |
| "completions/min_length": 28.0, |
| "completions/min_terminated_length": 28.0, |
| "entropy": 0.6074822843074799, |
| "epoch": 0.3936781609195402, |
| "frac_reward_zero_std": 0.40625, |
| "grad_norm": 0.0033188818488270044, |
| "learning_rate": 1e-05, |
| "loss": 0.0005, |
| "num_tokens": 15783608.0, |
| "reward": 0.7168921232223511, |
| "reward_std": 0.11317743360996246, |
| "rewards/ngram_repetition2/mean": -0.00015996501315385103, |
| "rewards/ngram_repetition2/std": 0.001963542541489005, |
| "rewards/ngram_repetition3/mean": -8.46942639327608e-05, |
| "rewards/ngram_repetition3/std": 0.001818765769712627, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.646484375, |
| "rewards/symbolic_reward_accuracy/std": 0.47852855920791626, |
| "rewards/symbolic_reward_partial_score/mean": 0.8811848759651184, |
| "rewards/symbolic_reward_partial_score/std": 0.21943449974060059, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2335309982299805, |
| "sampling/importance_sampling_ratio/min": 0.00211329129524529, |
| "sampling/sampling_logp_difference/max": 6.15950870513916, |
| "sampling/sampling_logp_difference/mean": 0.281631737947464, |
| "step": 137 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1640625, |
| "entropy": 0.6190885007381439, |
| "epoch": 0.39655172413793105, |
| "grad_norm": 0.001330671482719481, |
| "learning_rate": 1e-05, |
| "loss": -0.0006, |
| "step": 138 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1640625, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2265625, |
| "entropy": 0.6273294687271118, |
| "epoch": 0.3994252873563218, |
| "grad_norm": 0.001271542045287788, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 139 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.140625, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1953125, |
| "entropy": 0.6308173537254333, |
| "epoch": 0.40229885057471265, |
| "grad_norm": 0.0031829047948122025, |
| "learning_rate": 1e-05, |
| "loss": 0.0006, |
| "step": 140 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 177.0, |
| "completions/max_terminated_length": 177.0, |
| "completions/mean_length": 68.35546875, |
| "completions/mean_terminated_length": 68.35546875, |
| "completions/min_length": 28.0, |
| "completions/min_terminated_length": 28.0, |
| "entropy": 0.6132214367389679, |
| "epoch": 0.4051724137931034, |
| "frac_reward_zero_std": 0.40625, |
| "grad_norm": 0.003619483206421137, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "num_tokens": 16214894.0, |
| "reward": 0.9032166004180908, |
| "reward_std": 0.11951880156993866, |
| "rewards/ngram_repetition2/mean": -0.00033016284578479826, |
| "rewards/ngram_repetition2/std": 0.005208797287195921, |
| "rewards/ngram_repetition3/mean": -0.00027478154515847564, |
| "rewards/ngram_repetition3/std": 0.004721499979496002, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.884765625, |
| "rewards/symbolic_reward_accuracy/std": 0.3196168541908264, |
| "rewards/symbolic_reward_partial_score/mean": 0.9462890625, |
| "rewards/symbolic_reward_partial_score/std": 0.1916707307100296, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2291423082351685, |
| "sampling/importance_sampling_ratio/min": 0.003642383264377713, |
| "sampling/sampling_logp_difference/max": 5.615117073059082, |
| "sampling/sampling_logp_difference/mean": 0.28253355622291565, |
| "step": 141 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.21875, |
| "entropy": 0.6177562177181244, |
| "epoch": 0.40804597701149425, |
| "grad_norm": 0.002994804410263896, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "step": 142 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.15625, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1875, |
| "entropy": 0.6419652104377747, |
| "epoch": 0.4109195402298851, |
| "grad_norm": 0.00205876212567091, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 143 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1328125, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1796875, |
| "entropy": 0.6255612373352051, |
| "epoch": 0.41379310344827586, |
| "grad_norm": 0.0019418266601860523, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 144 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 125.0, |
| "completions/max_terminated_length": 125.0, |
| "completions/mean_length": 67.984375, |
| "completions/mean_terminated_length": 67.984375, |
| "completions/min_length": 28.0, |
| "completions/min_terminated_length": 28.0, |
| "entropy": 0.6153721511363983, |
| "epoch": 0.4166666666666667, |
| "frac_reward_zero_std": 0.40625, |
| "grad_norm": 0.0032779767643660307, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "num_tokens": 16680358.0, |
| "reward": 0.7255349159240723, |
| "reward_std": 0.09427813440561295, |
| "rewards/ngram_repetition2/mean": -5.109919948154129e-05, |
| "rewards/ngram_repetition2/std": 0.0011562429135665298, |
| "rewards/ngram_repetition3/mean": -0.00016613237676210701, |
| "rewards/ngram_repetition3/std": 0.0023371358402073383, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.65234375, |
| "rewards/symbolic_reward_accuracy/std": 0.47669193148612976, |
| "rewards/symbolic_reward_partial_score/mean": 0.8963215947151184, |
| "rewards/symbolic_reward_partial_score/std": 0.2021331936120987, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2260642051696777, |
| "sampling/importance_sampling_ratio/min": 0.0031624305993318558, |
| "sampling/sampling_logp_difference/max": 5.756414413452148, |
| "sampling/sampling_logp_difference/mean": 0.2714301347732544, |
| "step": 145 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1171875, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.203125, |
| "entropy": 0.5929703116416931, |
| "epoch": 0.41954022988505746, |
| "grad_norm": 0.0029908197466284037, |
| "learning_rate": 1e-05, |
| "loss": 0.0005, |
| "step": 146 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.109375, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1640625, |
| "entropy": 0.5892519950866699, |
| "epoch": 0.4224137931034483, |
| "grad_norm": 0.0029139199759811163, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 147 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1015625, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.15625, |
| "entropy": 0.6122852563858032, |
| "epoch": 0.42528735632183906, |
| "grad_norm": 0.0019851247780025005, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 148 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.001953125, |
| "completions/max_length": 16384.0, |
| "completions/max_terminated_length": 151.0, |
| "completions/mean_length": 98.384765625, |
| "completions/mean_terminated_length": 66.51467895507812, |
| "completions/min_length": 28.0, |
| "completions/min_terminated_length": 28.0, |
| "entropy": 0.5971274673938751, |
| "epoch": 0.4281609195402299, |
| "frac_reward_zero_std": 0.25, |
| "grad_norm": 0.005040820688009262, |
| "learning_rate": 1e-05, |
| "loss": -0.0004, |
| "num_tokens": 17148555.0, |
| "reward": 0.7448210120201111, |
| "reward_std": 0.1062232255935669, |
| "rewards/ngram_repetition2/mean": -0.0026194120291620493, |
| "rewards/ngram_repetition2/std": 0.042374733835458755, |
| "rewards/ngram_repetition3/mean": -0.002586992457509041, |
| "rewards/ngram_repetition3/std": 0.04207787662744522, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.697265625, |
| "rewards/symbolic_reward_accuracy/std": 0.45989060401916504, |
| "rewards/symbolic_reward_partial_score/mean": 0.85595703125, |
| "rewards/symbolic_reward_partial_score/std": 0.25713518261909485, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2251850366592407, |
| "sampling/importance_sampling_ratio/min": 0.0008897649240680039, |
| "sampling/sampling_logp_difference/max": 7.024553298950195, |
| "sampling/sampling_logp_difference/mean": 0.27212345600128174, |
| "step": 149 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2734375, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3203125, |
| "entropy": 0.5970511436462402, |
| "epoch": 0.43103448275862066, |
| "grad_norm": 0.0023280696477741003, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 150 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.203125, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.265625, |
| "entropy": 0.6188189387321472, |
| "epoch": 0.4339080459770115, |
| "grad_norm": 0.003590863663703203, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "step": 151 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1875, |
| "entropy": 0.619499146938324, |
| "epoch": 0.4367816091954023, |
| "grad_norm": 0.0035510375164449215, |
| "learning_rate": 1e-05, |
| "loss": 0.0295, |
| "step": 152 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0078125, |
| "completions/max_length": 16384.0, |
| "completions/max_terminated_length": 3966.0, |
| "completions/mean_length": 199.205078125, |
| "completions/mean_terminated_length": 71.7657470703125, |
| "completions/min_length": 35.0, |
| "completions/min_terminated_length": 35.0, |
| "entropy": 0.5717917680740356, |
| "epoch": 0.4396551724137931, |
| "frac_reward_zero_std": 0.15625, |
| "grad_norm": 0.004968932364135981, |
| "learning_rate": 1e-05, |
| "loss": -0.001, |
| "num_tokens": 17665524.0, |
| "reward": 0.7633627653121948, |
| "reward_std": 0.09578961879014969, |
| "rewards/ngram_repetition2/mean": -0.010589256882667542, |
| "rewards/ngram_repetition2/std": 0.09524083882570267, |
| "rewards/ngram_repetition3/mean": -0.010552708059549332, |
| "rewards/ngram_repetition3/std": 0.0955950990319252, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.712890625, |
| "rewards/symbolic_reward_accuracy/std": 0.45285552740097046, |
| "rewards/symbolic_reward_partial_score/mean": 0.8818359375, |
| "rewards/symbolic_reward_partial_score/std": 0.2471448928117752, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1987650394439697, |
| "sampling/importance_sampling_ratio/min": 0.002070514252409339, |
| "sampling/sampling_logp_difference/max": 6.179958343505859, |
| "sampling/sampling_logp_difference/mean": 0.23549169301986694, |
| "step": 153 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.203125, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.234375, |
| "entropy": 0.57914999127388, |
| "epoch": 0.4425287356321839, |
| "grad_norm": 0.0037953564897179604, |
| "learning_rate": 1e-05, |
| "loss": 0.0279, |
| "step": 154 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2734375, |
| "clip_ratio/low_mean": 0.0234375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.296875, |
| "entropy": 0.6249216198921204, |
| "epoch": 0.4454022988505747, |
| "grad_norm": 0.0024858491960912943, |
| "learning_rate": 1e-05, |
| "loss": 0.0066, |
| "step": 155 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1953125, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.28125, |
| "entropy": 0.5942817330360413, |
| "epoch": 0.4482758620689655, |
| "grad_norm": 0.005163618829101324, |
| "learning_rate": 1e-05, |
| "loss": 0.0294, |
| "step": 156 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 117.0, |
| "completions/max_terminated_length": 117.0, |
| "completions/mean_length": 60.4375, |
| "completions/mean_terminated_length": 60.4375, |
| "completions/min_length": 38.0, |
| "completions/min_terminated_length": 38.0, |
| "entropy": 0.5676226019859314, |
| "epoch": 0.4511494252873563, |
| "frac_reward_zero_std": 0.40625, |
| "grad_norm": 0.003438665997236967, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "num_tokens": 18102260.0, |
| "reward": 0.7589313983917236, |
| "reward_std": 0.0913117527961731, |
| "rewards/ngram_repetition2/mean": -0.00015224021626636386, |
| "rewards/ngram_repetition2/std": 0.0017761130584403872, |
| "rewards/ngram_repetition3/mean": -0.0002660206810105592, |
| "rewards/ngram_repetition3/std": 0.0028565367683768272, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.6953125, |
| "rewards/symbolic_reward_accuracy/std": 0.4607250988483429, |
| "rewards/symbolic_reward_partial_score/mean": 0.9073892831802368, |
| "rewards/symbolic_reward_partial_score/std": 0.19136381149291992, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2238843441009521, |
| "sampling/importance_sampling_ratio/min": 0.004014967940747738, |
| "sampling/sampling_logp_difference/max": 5.517725944519043, |
| "sampling/sampling_logp_difference/mean": 0.2637117803096771, |
| "step": 157 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1484375, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1875, |
| "entropy": 0.5621353983879089, |
| "epoch": 0.4540229885057471, |
| "grad_norm": 0.00312551436945796, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 158 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2421875, |
| "entropy": 0.5782199203968048, |
| "epoch": 0.45689655172413796, |
| "grad_norm": 0.0011367530096322298, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 159 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1171875, |
| "clip_ratio/low_mean": 0.078125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1953125, |
| "entropy": 0.5718804597854614, |
| "epoch": 0.45977011494252873, |
| "grad_norm": 0.0023252142127603292, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "step": 160 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 101.0, |
| "completions/max_terminated_length": 101.0, |
| "completions/mean_length": 56.208984375, |
| "completions/mean_terminated_length": 56.208984375, |
| "completions/min_length": 28.0, |
| "completions/min_terminated_length": 28.0, |
| "entropy": 0.4680989980697632, |
| "epoch": 0.46264367816091956, |
| "frac_reward_zero_std": 0.3125, |
| "grad_norm": 0.004628314170986414, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "num_tokens": 18558047.0, |
| "reward": 0.7402796745300293, |
| "reward_std": 0.11135189980268478, |
| "rewards/ngram_repetition2/mean": -0.00023176189279183745, |
| "rewards/ngram_repetition2/std": 0.003024648642167449, |
| "rewards/ngram_repetition3/mean": -0.00011768023250624537, |
| "rewards/ngram_repetition3/std": 0.00259247119538486, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.6796875, |
| "rewards/symbolic_reward_accuracy/std": 0.4670529365539551, |
| "rewards/symbolic_reward_partial_score/mean": 0.8816731572151184, |
| "rewards/symbolic_reward_partial_score/std": 0.23182804882526398, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1849548816680908, |
| "sampling/importance_sampling_ratio/min": 0.0010554436594247818, |
| "sampling/sampling_logp_difference/max": 6.853794097900391, |
| "sampling/sampling_logp_difference/mean": 0.2266506552696228, |
| "step": 161 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1171875, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1640625, |
| "entropy": 0.4583146572113037, |
| "epoch": 0.46551724137931033, |
| "grad_norm": 0.0011460937093943357, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 162 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.203125, |
| "clip_ratio/low_mean": 0.1015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3046875, |
| "entropy": 0.4420630782842636, |
| "epoch": 0.46839080459770116, |
| "grad_norm": 0.0012833460932597518, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 163 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.0546875, |
| "clip_ratio/low_mean": 0.109375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1640625, |
| "entropy": 0.4771575480699539, |
| "epoch": 0.47126436781609193, |
| "grad_norm": 0.0012788056628778577, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 164 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 101.0, |
| "completions/max_terminated_length": 101.0, |
| "completions/mean_length": 54.5, |
| "completions/mean_terminated_length": 54.5, |
| "completions/min_length": 6.0, |
| "completions/min_terminated_length": 6.0, |
| "entropy": 0.46562887728214264, |
| "epoch": 0.47413793103448276, |
| "frac_reward_zero_std": 0.1875, |
| "grad_norm": 0.0049250805750489235, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "num_tokens": 19006783.0, |
| "reward": 0.7197023630142212, |
| "reward_std": 0.1307801455259323, |
| "rewards/ngram_repetition2/mean": -0.001037043984979391, |
| "rewards/ngram_repetition2/std": 0.012370138429105282, |
| "rewards/ngram_repetition3/mean": -0.0013850650284439325, |
| "rewards/ngram_repetition3/std": 0.013737454079091549, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.66015625, |
| "rewards/symbolic_reward_accuracy/std": 0.4741191864013672, |
| "rewards/symbolic_reward_partial_score/mean": 0.8645833134651184, |
| "rewards/symbolic_reward_partial_score/std": 0.25641369819641113, |
| "rewards/tag_count_reward/mean": -0.017578125, |
| "rewards/tag_count_reward/std": 0.13154059648513794, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1873273849487305, |
| "sampling/importance_sampling_ratio/min": 0.0031211727764457464, |
| "sampling/sampling_logp_difference/max": 5.7695465087890625, |
| "sampling/sampling_logp_difference/mean": 0.22203099727630615, |
| "step": 165 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1640625, |
| "clip_ratio/low_mean": 0.09375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.4372178316116333, |
| "epoch": 0.47701149425287354, |
| "grad_norm": 0.0017901709070429206, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 166 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.1015625, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.171875, |
| "entropy": 0.43625424802303314, |
| "epoch": 0.47988505747126436, |
| "grad_norm": 0.0014502947451546788, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 167 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1640625, |
| "clip_ratio/low_mean": 0.09375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.4362707883119583, |
| "epoch": 0.4827586206896552, |
| "grad_norm": 0.002049398375675082, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 168 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 11593.0, |
| "completions/max_terminated_length": 11593.0, |
| "completions/mean_length": 79.595703125, |
| "completions/mean_terminated_length": 79.595703125, |
| "completions/min_length": 23.0, |
| "completions/min_terminated_length": 23.0, |
| "entropy": 0.4336452931165695, |
| "epoch": 0.48563218390804597, |
| "frac_reward_zero_std": 0.21875, |
| "grad_norm": 0.001363643677905202, |
| "learning_rate": 1e-05, |
| "loss": 0.0174, |
| "num_tokens": 19437648.0, |
| "reward": 0.7321650981903076, |
| "reward_std": 0.10649190843105316, |
| "rewards/ngram_repetition2/mean": -0.0030163757037371397, |
| "rewards/ngram_repetition2/std": 0.04440012574195862, |
| "rewards/ngram_repetition3/mean": -0.003132551908493042, |
| "rewards/ngram_repetition3/std": 0.04444865137338638, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.677734375, |
| "rewards/symbolic_reward_accuracy/std": 0.46780112385749817, |
| "rewards/symbolic_reward_partial_score/mean": 0.861328125, |
| "rewards/symbolic_reward_partial_score/std": 0.2618214786052704, |
| "rewards/tag_count_reward/mean": -0.005859375, |
| "rewards/tag_count_reward/std": 0.07639661431312561, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1761358976364136, |
| "sampling/importance_sampling_ratio/min": 0.0009067317587323487, |
| "sampling/sampling_logp_difference/max": 7.005663871765137, |
| "sampling/sampling_logp_difference/mean": 0.20624347031116486, |
| "step": 169 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2265625, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.4565078616142273, |
| "epoch": 0.4885057471264368, |
| "grad_norm": 0.003753960132598877, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 170 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2265625, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3125, |
| "entropy": 0.4344637244939804, |
| "epoch": 0.49137931034482757, |
| "grad_norm": 0.004187124315649271, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "step": 171 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2109375, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.296875, |
| "entropy": 0.4768785983324051, |
| "epoch": 0.4942528735632184, |
| "grad_norm": 0.0035138626117259264, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 172 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 92.0, |
| "completions/max_terminated_length": 92.0, |
| "completions/mean_length": 55.111328125, |
| "completions/mean_terminated_length": 55.111328125, |
| "completions/min_length": 20.0, |
| "completions/min_terminated_length": 20.0, |
| "entropy": 0.4357730895280838, |
| "epoch": 0.49712643678160917, |
| "frac_reward_zero_std": 0.34375, |
| "grad_norm": 0.004127317573875189, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "num_tokens": 19877673.0, |
| "reward": 0.6550204753875732, |
| "reward_std": 0.09477680921554565, |
| "rewards/ngram_repetition2/mean": -0.000270573771558702, |
| "rewards/ngram_repetition2/std": 0.0034710762556642294, |
| "rewards/ngram_repetition3/mean": -0.0006142433849163353, |
| "rewards/ngram_repetition3/std": 0.006228008773177862, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.587890625, |
| "rewards/symbolic_reward_accuracy/std": 0.49269601702690125, |
| "rewards/symbolic_reward_partial_score/mean": 0.8142903447151184, |
| "rewards/symbolic_reward_partial_score/std": 0.26848551630973816, |
| "rewards/tag_count_reward/mean": -0.0078125, |
| "rewards/tag_count_reward/std": 0.08812850713729858, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1787736415863037, |
| "sampling/importance_sampling_ratio/min": 0.002628255868330598, |
| "sampling/sampling_logp_difference/max": 5.941434860229492, |
| "sampling/sampling_logp_difference/mean": 0.2137116938829422, |
| "step": 173 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.203125, |
| "clip_ratio/low_mean": 0.0234375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2265625, |
| "entropy": 0.42396125197410583, |
| "epoch": 0.5, |
| "grad_norm": 0.001028429134748876, |
| "learning_rate": 1e-05, |
| "loss": -0.0004, |
| "step": 174 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.21875, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.265625, |
| "entropy": 0.4160853624343872, |
| "epoch": 0.5028735632183908, |
| "grad_norm": 0.0022454692516475916, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "step": 175 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.09375, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.140625, |
| "entropy": 0.457041934132576, |
| "epoch": 0.5057471264367817, |
| "grad_norm": 0.001310663647018373, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 176 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 6001.0, |
| "completions/max_terminated_length": 6001.0, |
| "completions/mean_length": 63.87109375, |
| "completions/mean_terminated_length": 63.87109375, |
| "completions/min_length": 34.0, |
| "completions/min_terminated_length": 34.0, |
| "entropy": 0.3979407250881195, |
| "epoch": 0.5086206896551724, |
| "frac_reward_zero_std": 0.28125, |
| "grad_norm": 0.0026170641649514437, |
| "learning_rate": 1e-05, |
| "loss": -0.0004, |
| "num_tokens": 20334055.0, |
| "reward": 0.754780113697052, |
| "reward_std": 0.11056315898895264, |
| "rewards/ngram_repetition2/mean": -0.0019408154767006636, |
| "rewards/ngram_repetition2/std": 0.043002985417842865, |
| "rewards/ngram_repetition3/mean": -0.003445713547989726, |
| "rewards/ngram_repetition3/std": 0.04354758933186531, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.697265625, |
| "rewards/symbolic_reward_accuracy/std": 0.45989060401916504, |
| "rewards/symbolic_reward_partial_score/mean": 0.8898111581802368, |
| "rewards/symbolic_reward_partial_score/std": 0.21633607149124146, |
| "rewards/tag_count_reward/mean": -0.001953125, |
| "rewards/tag_count_reward/std": 0.04419417306780815, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1619974374771118, |
| "sampling/importance_sampling_ratio/min": 0.004802882205694914, |
| "sampling/sampling_logp_difference/max": 5.338539123535156, |
| "sampling/sampling_logp_difference/mean": 0.20070970058441162, |
| "step": 177 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2265625, |
| "clip_ratio/low_mean": 0.109375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3359375, |
| "entropy": 0.3868762254714966, |
| "epoch": 0.5114942528735632, |
| "grad_norm": 0.002501050941646099, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 178 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1328125, |
| "clip_ratio/low_mean": 0.078125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2109375, |
| "entropy": 0.40972326695919037, |
| "epoch": 0.514367816091954, |
| "grad_norm": 0.00537771126255393, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "step": 179 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2421875, |
| "entropy": 0.3921160250902176, |
| "epoch": 0.5172413793103449, |
| "grad_norm": 0.0022035855799913406, |
| "learning_rate": 1e-05, |
| "loss": 0.007, |
| "step": 180 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 11699.0, |
| "completions/max_terminated_length": 11699.0, |
| "completions/mean_length": 75.41015625, |
| "completions/mean_terminated_length": 75.41015625, |
| "completions/min_length": 35.0, |
| "completions/min_terminated_length": 35.0, |
| "entropy": 0.3657400608062744, |
| "epoch": 0.5201149425287356, |
| "frac_reward_zero_std": 0.21875, |
| "grad_norm": 0.004235206637531519, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "num_tokens": 20765945.0, |
| "reward": 0.7215181589126587, |
| "reward_std": 0.15164119005203247, |
| "rewards/ngram_repetition2/mean": -0.0021292921155691147, |
| "rewards/ngram_repetition2/std": 0.042043447494506836, |
| "rewards/ngram_repetition3/mean": -0.004256935324519873, |
| "rewards/ngram_repetition3/std": 0.04291826859116554, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.6640625, |
| "rewards/symbolic_reward_accuracy/std": 0.4727790653705597, |
| "rewards/symbolic_reward_partial_score/mean": 0.8564453125, |
| "rewards/symbolic_reward_partial_score/std": 0.25408750772476196, |
| "rewards/tag_count_reward/mean": -0.001953125, |
| "rewards/tag_count_reward/std": 0.04419417306780815, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.154313325881958, |
| "sampling/importance_sampling_ratio/min": 0.004486561752855778, |
| "sampling/sampling_logp_difference/max": 5.406668663024902, |
| "sampling/sampling_logp_difference/mean": 0.19018931686878204, |
| "step": 181 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.21875, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3046875, |
| "entropy": 0.3666221499443054, |
| "epoch": 0.5229885057471264, |
| "grad_norm": 0.014725334011018276, |
| "learning_rate": 1e-05, |
| "loss": 0.0176, |
| "step": 182 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.1640625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.34375, |
| "entropy": 0.39625655114650726, |
| "epoch": 0.5258620689655172, |
| "grad_norm": 0.00213233451358974, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 183 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.078125, |
| "clip_ratio/low_mean": 0.1875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.265625, |
| "entropy": 0.3968651592731476, |
| "epoch": 0.5287356321839081, |
| "grad_norm": 0.0036237018648535013, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 184 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 86.0, |
| "completions/max_terminated_length": 86.0, |
| "completions/mean_length": 51.23828125, |
| "completions/mean_terminated_length": 51.23828125, |
| "completions/min_length": 35.0, |
| "completions/min_terminated_length": 35.0, |
| "entropy": 0.36812902987003326, |
| "epoch": 0.5316091954022989, |
| "frac_reward_zero_std": 0.0625, |
| "grad_norm": 0.004530012607574463, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "num_tokens": 21201139.0, |
| "reward": 0.6796539425849915, |
| "reward_std": 0.1844376027584076, |
| "rewards/ngram_repetition2/mean": -0.005348730832338333, |
| "rewards/ngram_repetition2/std": 0.026414524763822556, |
| "rewards/ngram_repetition3/mean": -0.012658631429076195, |
| "rewards/ngram_repetition3/std": 0.03263530880212784, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.609375, |
| "rewards/symbolic_reward_accuracy/std": 0.48836761713027954, |
| "rewards/symbolic_reward_partial_score/mean": 0.8468424081802368, |
| "rewards/symbolic_reward_partial_score/std": 0.24985168874263763, |
| "rewards/tag_count_reward/mean": -0.0078125, |
| "rewards/tag_count_reward/std": 0.08812850713729858, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.15095853805542, |
| "sampling/importance_sampling_ratio/min": 0.0017468180740252137, |
| "sampling/sampling_logp_difference/max": 6.349959373474121, |
| "sampling/sampling_logp_difference/mean": 0.1853640079498291, |
| "step": 185 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.2734375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3984375, |
| "entropy": 0.3317708671092987, |
| "epoch": 0.5344827586206896, |
| "grad_norm": 0.00515081686899066, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "step": 186 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.28125, |
| "clip_ratio/low_mean": 0.1328125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.4140625, |
| "entropy": 0.3512876033782959, |
| "epoch": 0.5373563218390804, |
| "grad_norm": 0.0033993138931691647, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 187 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.21875, |
| "clip_ratio/low_mean": 0.171875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.390625, |
| "entropy": 0.358001247048378, |
| "epoch": 0.5402298850574713, |
| "grad_norm": 0.0027523390017449856, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 188 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 84.0, |
| "completions/max_terminated_length": 84.0, |
| "completions/mean_length": 53.28125, |
| "completions/mean_terminated_length": 53.28125, |
| "completions/min_length": 33.0, |
| "completions/min_terminated_length": 33.0, |
| "entropy": 0.37513381242752075, |
| "epoch": 0.5431034482758621, |
| "frac_reward_zero_std": 0.03125, |
| "grad_norm": 0.003202933119609952, |
| "learning_rate": 1e-05, |
| "loss": 0.0005, |
| "num_tokens": 21661603.0, |
| "reward": 0.660548985004425, |
| "reward_std": 0.18187561631202698, |
| "rewards/ngram_repetition2/mean": -0.011021820828318596, |
| "rewards/ngram_repetition2/std": 0.03202351555228233, |
| "rewards/ngram_repetition3/mean": -0.018067045137286186, |
| "rewards/ngram_repetition3/std": 0.03626594319939613, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.587890625, |
| "rewards/symbolic_reward_accuracy/std": 0.49269601702690125, |
| "rewards/symbolic_reward_partial_score/mean": 0.8310546875, |
| "rewards/symbolic_reward_partial_score/std": 0.25592610239982605, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1602153778076172, |
| "sampling/importance_sampling_ratio/min": 0.003239927114918828, |
| "sampling/sampling_logp_difference/max": 5.732204437255859, |
| "sampling/sampling_logp_difference/mean": 0.19722561538219452, |
| "step": 189 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.3125, |
| "clip_ratio/low_mean": 0.125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.4375, |
| "entropy": 0.37337982654571533, |
| "epoch": 0.5459770114942529, |
| "grad_norm": 0.0023845031391829252, |
| "learning_rate": 1e-05, |
| "loss": -0.0005, |
| "step": 190 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.171875, |
| "clip_ratio/low_mean": 0.2421875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.4140625, |
| "entropy": 0.3779737800359726, |
| "epoch": 0.5488505747126436, |
| "grad_norm": 0.002789959777146578, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 191 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.21875, |
| "clip_ratio/low_mean": 0.140625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.359375, |
| "entropy": 0.38622401654720306, |
| "epoch": 0.5517241379310345, |
| "grad_norm": 0.003167940303683281, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 192 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 77.0, |
| "completions/max_terminated_length": 77.0, |
| "completions/mean_length": 51.712890625, |
| "completions/mean_terminated_length": 51.712890625, |
| "completions/min_length": 31.0, |
| "completions/min_terminated_length": 31.0, |
| "entropy": 0.36182406544685364, |
| "epoch": 0.5545977011494253, |
| "frac_reward_zero_std": 0.0625, |
| "grad_norm": 0.00419881846755743, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "num_tokens": 22103056.0, |
| "reward": 0.6468621492385864, |
| "reward_std": 0.18368908762931824, |
| "rewards/ngram_repetition2/mean": -0.008758383803069592, |
| "rewards/ngram_repetition2/std": 0.02697998657822609, |
| "rewards/ngram_repetition3/mean": -0.016943732276558876, |
| "rewards/ngram_repetition3/std": 0.03183072432875633, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.56640625, |
| "rewards/symbolic_reward_accuracy/std": 0.4960552453994751, |
| "rewards/symbolic_reward_partial_score/mean": 0.83544921875, |
| "rewards/symbolic_reward_partial_score/std": 0.24576953053474426, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1562879085540771, |
| "sampling/importance_sampling_ratio/min": 0.004593458957970142, |
| "sampling/sampling_logp_difference/max": 5.383121967315674, |
| "sampling/sampling_logp_difference/mean": 0.1905713975429535, |
| "step": 193 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1875, |
| "clip_ratio/low_mean": 0.2578125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.4453125, |
| "entropy": 0.3760426491498947, |
| "epoch": 0.5574712643678161, |
| "grad_norm": 0.002429383806884289, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 194 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2265625, |
| "clip_ratio/low_mean": 0.25, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.4765625, |
| "entropy": 0.36310867965221405, |
| "epoch": 0.5603448275862069, |
| "grad_norm": 0.0015612218994647264, |
| "learning_rate": 1e-05, |
| "loss": -0.0004, |
| "step": 195 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.28125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.4609375, |
| "entropy": 0.3646097779273987, |
| "epoch": 0.5632183908045977, |
| "grad_norm": 0.0017141635762527585, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 196 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 79.0, |
| "completions/max_terminated_length": 79.0, |
| "completions/mean_length": 49.515625, |
| "completions/mean_terminated_length": 49.515625, |
| "completions/min_length": 33.0, |
| "completions/min_terminated_length": 33.0, |
| "entropy": 0.3320048302412033, |
| "epoch": 0.5660919540229885, |
| "frac_reward_zero_std": 0.03125, |
| "grad_norm": 0.0032437844201922417, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "num_tokens": 22549560.0, |
| "reward": 0.7433836460113525, |
| "reward_std": 0.15558896958827972, |
| "rewards/ngram_repetition2/mean": -0.0033277005422860384, |
| "rewards/ngram_repetition2/std": 0.01725313626229763, |
| "rewards/ngram_repetition3/mean": -0.004009343683719635, |
| "rewards/ngram_repetition3/std": 0.01900562271475792, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.689453125, |
| "rewards/symbolic_reward_accuracy/std": 0.46317005157470703, |
| "rewards/symbolic_reward_partial_score/mean": 0.8694661259651184, |
| "rewards/symbolic_reward_partial_score/std": 0.24413488805294037, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1475906372070312, |
| "sampling/importance_sampling_ratio/min": 0.003707862924784422, |
| "sampling/sampling_logp_difference/max": 5.597299575805664, |
| "sampling/sampling_logp_difference/mean": 0.16953125596046448, |
| "step": 197 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.09375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2734375, |
| "entropy": 0.3272063732147217, |
| "epoch": 0.5689655172413793, |
| "grad_norm": 0.0022684920113533735, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 198 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2421875, |
| "clip_ratio/low_mean": 0.1484375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.390625, |
| "entropy": 0.3217940479516983, |
| "epoch": 0.5718390804597702, |
| "grad_norm": 0.0012801972916349769, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 199 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.234375, |
| "clip_ratio/low_mean": 0.109375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.34375, |
| "entropy": 0.34869106113910675, |
| "epoch": 0.5747126436781609, |
| "grad_norm": 0.0017063482664525509, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 200 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 74.0, |
| "completions/max_terminated_length": 74.0, |
| "completions/mean_length": 50.125, |
| "completions/mean_terminated_length": 50.125, |
| "completions/min_length": 19.0, |
| "completions/min_terminated_length": 19.0, |
| "entropy": 0.3502783924341202, |
| "epoch": 0.5775862068965517, |
| "frac_reward_zero_std": 0.0625, |
| "grad_norm": 0.0024014730006456375, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "num_tokens": 23005560.0, |
| "reward": 0.6850454807281494, |
| "reward_std": 0.1775665283203125, |
| "rewards/ngram_repetition2/mean": -0.0029550609178841114, |
| "rewards/ngram_repetition2/std": 0.015238610096275806, |
| "rewards/ngram_repetition3/mean": -0.0032362965866923332, |
| "rewards/ngram_repetition3/std": 0.016220975667238235, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.607421875, |
| "rewards/symbolic_reward_accuracy/std": 0.4888018071651459, |
| "rewards/symbolic_reward_partial_score/mean": 0.8663736581802368, |
| "rewards/symbolic_reward_partial_score/std": 0.2243141382932663, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1440577507019043, |
| "sampling/importance_sampling_ratio/min": 0.0059717451222240925, |
| "sampling/sampling_logp_difference/max": 5.120716094970703, |
| "sampling/sampling_logp_difference/mean": 0.1660330891609192, |
| "step": 201 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.09375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2734375, |
| "entropy": 0.3343355357646942, |
| "epoch": 0.5804597701149425, |
| "grad_norm": 0.0018374222563579679, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 202 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.171875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3515625, |
| "entropy": 0.3133174031972885, |
| "epoch": 0.5833333333333334, |
| "grad_norm": 0.0014621627051383257, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "step": 203 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2109375, |
| "clip_ratio/low_mean": 0.1328125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.34375, |
| "entropy": 0.32208897173404694, |
| "epoch": 0.5862068965517241, |
| "grad_norm": 0.0016267385799437761, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 204 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 75.0, |
| "completions/max_terminated_length": 75.0, |
| "completions/mean_length": 49.19921875, |
| "completions/mean_terminated_length": 49.19921875, |
| "completions/min_length": 34.0, |
| "completions/min_terminated_length": 34.0, |
| "entropy": 0.34746497869491577, |
| "epoch": 0.5890804597701149, |
| "frac_reward_zero_std": 0.1875, |
| "grad_norm": 0.0025201276876032352, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "num_tokens": 23451742.0, |
| "reward": 0.7662777900695801, |
| "reward_std": 0.15987689793109894, |
| "rewards/ngram_repetition2/mean": -0.0014059185050427914, |
| "rewards/ngram_repetition2/std": 0.009861784987151623, |
| "rewards/ngram_repetition3/mean": -0.001674062223173678, |
| "rewards/ngram_repetition3/std": 0.012159745208919048, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.70703125, |
| "rewards/symbolic_reward_accuracy/std": 0.455569326877594, |
| "rewards/symbolic_reward_partial_score/mean": 0.9046223759651184, |
| "rewards/symbolic_reward_partial_score/std": 0.21026679873466492, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1511237621307373, |
| "sampling/importance_sampling_ratio/min": 0.012976454570889473, |
| "sampling/sampling_logp_difference/max": 4.344618797302246, |
| "sampling/sampling_logp_difference/mean": 0.1697186529636383, |
| "step": 205 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1953125, |
| "entropy": 0.318589448928833, |
| "epoch": 0.5919540229885057, |
| "grad_norm": 0.0021791488397866488, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 206 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1484375, |
| "clip_ratio/low_mean": 0.109375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.35446639358997345, |
| "epoch": 0.5948275862068966, |
| "grad_norm": 0.002254737773910165, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 207 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2265625, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2890625, |
| "entropy": 0.3369624614715576, |
| "epoch": 0.5977011494252874, |
| "grad_norm": 0.0016745221801102161, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 208 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 83.0, |
| "completions/max_terminated_length": 83.0, |
| "completions/mean_length": 49.271484375, |
| "completions/mean_terminated_length": 49.271484375, |
| "completions/min_length": 35.0, |
| "completions/min_terminated_length": 35.0, |
| "entropy": 0.33122432231903076, |
| "epoch": 0.6005747126436781, |
| "frac_reward_zero_std": 0.0625, |
| "grad_norm": 0.0019423977937549353, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "num_tokens": 23888617.0, |
| "reward": 0.6938353776931763, |
| "reward_std": 0.15832862257957458, |
| "rewards/ngram_repetition2/mean": -0.003065573051571846, |
| "rewards/ngram_repetition2/std": 0.01647561974823475, |
| "rewards/ngram_repetition3/mean": -0.0030463775619864464, |
| "rewards/ngram_repetition3/std": 0.016763268038630486, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.625, |
| "rewards/symbolic_reward_accuracy/std": 0.4845963716506958, |
| "rewards/symbolic_reward_partial_score/mean": 0.8546549081802368, |
| "rewards/symbolic_reward_partial_score/std": 0.24149714410305023, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1502861976623535, |
| "sampling/importance_sampling_ratio/min": 0.010833137668669224, |
| "sampling/sampling_logp_difference/max": 4.525145530700684, |
| "sampling/sampling_logp_difference/mean": 0.17161469161510468, |
| "step": 209 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.234375, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3203125, |
| "entropy": 0.3186955451965332, |
| "epoch": 0.603448275862069, |
| "grad_norm": 0.0018531373934820294, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "step": 210 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.2109375, |
| "clip_ratio/low_mean": 0.1015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3125, |
| "entropy": 0.31654396653175354, |
| "epoch": 0.6063218390804598, |
| "grad_norm": 0.0016878793248906732, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 211 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.234375, |
| "clip_ratio/low_mean": 0.1171875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3515625, |
| "entropy": 0.33084215223789215, |
| "epoch": 0.6091954022988506, |
| "grad_norm": 0.00255804555490613, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 212 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 107.0, |
| "completions/max_terminated_length": 107.0, |
| "completions/mean_length": 50.869140625, |
| "completions/mean_terminated_length": 50.869140625, |
| "completions/min_length": 33.0, |
| "completions/min_terminated_length": 33.0, |
| "entropy": 0.35066109895706177, |
| "epoch": 0.6120689655172413, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.004320996347814798, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "num_tokens": 24329318.0, |
| "reward": 0.8296270966529846, |
| "reward_std": 0.12831971049308777, |
| "rewards/ngram_repetition2/mean": -0.004883305169641972, |
| "rewards/ngram_repetition2/std": 0.02222960814833641, |
| "rewards/ngram_repetition3/mean": -0.0060415808111429214, |
| "rewards/ngram_repetition3/std": 0.02252221666276455, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.787109375, |
| "rewards/symbolic_reward_accuracy/std": 0.409751296043396, |
| "rewards/symbolic_reward_partial_score/mean": 0.92919921875, |
| "rewards/symbolic_reward_partial_score/std": 0.1828947216272354, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1664955615997314, |
| "sampling/importance_sampling_ratio/min": 0.010721365921199322, |
| "sampling/sampling_logp_difference/max": 4.535516738891602, |
| "sampling/sampling_logp_difference/mean": 0.17997923493385315, |
| "step": 213 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.2734375, |
| "clip_ratio/low_mean": 0.1015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.375, |
| "entropy": 0.3535380959510803, |
| "epoch": 0.6149425287356322, |
| "grad_norm": 0.0023528970777988434, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 214 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2578125, |
| "clip_ratio/low_mean": 0.1484375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.40625, |
| "entropy": 0.36174456775188446, |
| "epoch": 0.617816091954023, |
| "grad_norm": 0.0027345679700374603, |
| "learning_rate": 1e-05, |
| "loss": 0.0007, |
| "step": 215 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.3671875, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.421875, |
| "entropy": 0.3348846435546875, |
| "epoch": 0.6206896551724138, |
| "grad_norm": 0.0029091956093907356, |
| "learning_rate": 1e-05, |
| "loss": -0.0006, |
| "step": 216 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 75.0, |
| "completions/max_terminated_length": 75.0, |
| "completions/mean_length": 48.234375, |
| "completions/mean_terminated_length": 48.234375, |
| "completions/min_length": 33.0, |
| "completions/min_terminated_length": 33.0, |
| "entropy": 0.3045446276664734, |
| "epoch": 0.6235632183908046, |
| "frac_reward_zero_std": 0.1875, |
| "grad_norm": 0.0026498916558921337, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "num_tokens": 24790206.0, |
| "reward": 0.7498259544372559, |
| "reward_std": 0.17582294344902039, |
| "rewards/ngram_repetition2/mean": -0.001370269455946982, |
| "rewards/ngram_repetition2/std": 0.00850055180490017, |
| "rewards/ngram_repetition3/mean": -0.0013840529136359692, |
| "rewards/ngram_repetition3/std": 0.008392706513404846, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.689453125, |
| "rewards/symbolic_reward_accuracy/std": 0.46317005157470703, |
| "rewards/symbolic_reward_partial_score/mean": 0.8907877206802368, |
| "rewards/symbolic_reward_partial_score/std": 0.21418313682079315, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1454708576202393, |
| "sampling/importance_sampling_ratio/min": 0.005403801798820496, |
| "sampling/sampling_logp_difference/max": 5.2206525802612305, |
| "sampling/sampling_logp_difference/mean": 0.16215276718139648, |
| "step": 217 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.25, |
| "clip_ratio/low_mean": 0.09375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.34375, |
| "entropy": 0.31783026456832886, |
| "epoch": 0.6264367816091954, |
| "grad_norm": 0.0027007993776351213, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "step": 218 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.21875, |
| "clip_ratio/low_mean": 0.1484375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3671875, |
| "entropy": 0.2987368553876877, |
| "epoch": 0.6293103448275862, |
| "grad_norm": 0.0020821229554712772, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "step": 219 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.21875, |
| "clip_ratio/low_mean": 0.1171875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3359375, |
| "entropy": 0.31727874279022217, |
| "epoch": 0.632183908045977, |
| "grad_norm": 0.0024530631490051746, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 220 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 73.0, |
| "completions/max_terminated_length": 73.0, |
| "completions/mean_length": 48.341796875, |
| "completions/mean_terminated_length": 48.341796875, |
| "completions/min_length": 33.0, |
| "completions/min_terminated_length": 33.0, |
| "entropy": 0.29757460951805115, |
| "epoch": 0.6350574712643678, |
| "frac_reward_zero_std": 0.21875, |
| "grad_norm": 0.002699502045288682, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "num_tokens": 25245613.0, |
| "reward": 0.7274583578109741, |
| "reward_std": 0.12696774303913116, |
| "rewards/ngram_repetition2/mean": -0.0015846589813008904, |
| "rewards/ngram_repetition2/std": 0.010053995065391064, |
| "rewards/ngram_repetition3/mean": -0.0016071019927039742, |
| "rewards/ngram_repetition3/std": 0.009995999746024609, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.66015625, |
| "rewards/symbolic_reward_accuracy/std": 0.4741191864013672, |
| "rewards/symbolic_reward_partial_score/mean": 0.8846029043197632, |
| "rewards/symbolic_reward_partial_score/std": 0.21321582794189453, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1418908834457397, |
| "sampling/importance_sampling_ratio/min": 0.003111095167696476, |
| "sampling/sampling_logp_difference/max": 5.772780418395996, |
| "sampling/sampling_logp_difference/mean": 0.15935096144676208, |
| "step": 221 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1484375, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2109375, |
| "entropy": 0.3056052029132843, |
| "epoch": 0.6379310344827587, |
| "grad_norm": 0.002731436863541603, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 222 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1640625, |
| "clip_ratio/low_mean": 0.140625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3046875, |
| "entropy": 0.2877749502658844, |
| "epoch": 0.6408045977011494, |
| "grad_norm": 0.0023609516210854053, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 223 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.2109375, |
| "clip_ratio/low_mean": 0.1015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3125, |
| "entropy": 0.2868718057870865, |
| "epoch": 0.6436781609195402, |
| "grad_norm": 0.002323941560462117, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 224 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 74.0, |
| "completions/max_terminated_length": 74.0, |
| "completions/mean_length": 47.828125, |
| "completions/mean_terminated_length": 47.828125, |
| "completions/min_length": 34.0, |
| "completions/min_terminated_length": 34.0, |
| "entropy": 0.3106960952281952, |
| "epoch": 0.646551724137931, |
| "frac_reward_zero_std": 0.28125, |
| "grad_norm": 0.003546177176758647, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "num_tokens": 25691093.0, |
| "reward": 0.7983278036117554, |
| "reward_std": 0.13130336999893188, |
| "rewards/ngram_repetition2/mean": -0.000689077889546752, |
| "rewards/ngram_repetition2/std": 0.006285086274147034, |
| "rewards/ngram_repetition3/mean": -0.0005111521459184587, |
| "rewards/ngram_repetition3/std": 0.004265496972948313, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.744140625, |
| "rewards/symbolic_reward_accuracy/std": 0.43676990270614624, |
| "rewards/symbolic_reward_partial_score/mean": 0.9248046875, |
| "rewards/symbolic_reward_partial_score/std": 0.16897927224636078, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1400320529937744, |
| "sampling/importance_sampling_ratio/min": 0.0077552772127091885, |
| "sampling/sampling_logp_difference/max": 4.859381675720215, |
| "sampling/sampling_logp_difference/mean": 0.1513688564300537, |
| "step": 225 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.25, |
| "entropy": 0.30004067718982697, |
| "epoch": 0.6494252873563219, |
| "grad_norm": 0.0019598742946982384, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "step": 226 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1875, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.29398113489151, |
| "epoch": 0.6522988505747126, |
| "grad_norm": 0.0021716589108109474, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 227 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1640625, |
| "clip_ratio/low_mean": 0.078125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2421875, |
| "entropy": 0.2966929078102112, |
| "epoch": 0.6551724137931034, |
| "grad_norm": 0.0018855092348530889, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 228 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 70.0, |
| "completions/max_terminated_length": 70.0, |
| "completions/mean_length": 47.287109375, |
| "completions/mean_terminated_length": 47.287109375, |
| "completions/min_length": 33.0, |
| "completions/min_terminated_length": 33.0, |
| "entropy": 0.3022087961435318, |
| "epoch": 0.6580459770114943, |
| "frac_reward_zero_std": 0.40625, |
| "grad_norm": 0.0015829600160941482, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "num_tokens": 26148808.0, |
| "reward": 0.8019521832466125, |
| "reward_std": 0.11994894593954086, |
| "rewards/ngram_repetition2/mean": -3.474125696811825e-05, |
| "rewards/ngram_repetition2/std": 0.0006160694174468517, |
| "rewards/ngram_repetition3/mean": -5.918560782447457e-05, |
| "rewards/ngram_repetition3/std": 0.0013392174150794744, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.75, |
| "rewards/symbolic_reward_accuracy/std": 0.43343618512153625, |
| "rewards/symbolic_reward_partial_score/mean": 0.9231770634651184, |
| "rewards/symbolic_reward_partial_score/std": 0.19579437375068665, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1348984241485596, |
| "sampling/importance_sampling_ratio/min": 0.009495556354522705, |
| "sampling/sampling_logp_difference/max": 4.656931400299072, |
| "sampling/sampling_logp_difference/mean": 0.14863896369934082, |
| "step": 229 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1953125, |
| "clip_ratio/low_mean": 0.078125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2734375, |
| "entropy": 0.28135547041893005, |
| "epoch": 0.6609195402298851, |
| "grad_norm": 0.0008100973791442811, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 230 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.15625, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2109375, |
| "entropy": 0.28573939204216003, |
| "epoch": 0.6637931034482759, |
| "grad_norm": 0.0018312670290470123, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 231 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.109375, |
| "clip_ratio/low_mean": 0.078125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1875, |
| "entropy": 0.2705778628587723, |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.002459563547745347, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 232 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 71.0, |
| "completions/max_terminated_length": 71.0, |
| "completions/mean_length": 48.6171875, |
| "completions/mean_terminated_length": 48.6171875, |
| "completions/min_length": 34.0, |
| "completions/min_terminated_length": 34.0, |
| "entropy": 0.30483564734458923, |
| "epoch": 0.6695402298850575, |
| "frac_reward_zero_std": 0.375, |
| "grad_norm": 0.0017376919277012348, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "num_tokens": 26573476.0, |
| "reward": 0.7558529376983643, |
| "reward_std": 0.10200260579586029, |
| "rewards/ngram_repetition2/mean": -0.0003638204070739448, |
| "rewards/ngram_repetition2/std": 0.004353736061602831, |
| "rewards/ngram_repetition3/mean": -0.00028086977545171976, |
| "rewards/ngram_repetition3/std": 0.005314893089234829, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.69140625, |
| "rewards/symbolic_reward_accuracy/std": 0.4623647928237915, |
| "rewards/symbolic_reward_partial_score/mean": 0.90625, |
| "rewards/symbolic_reward_partial_score/std": 0.1864483654499054, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1364421844482422, |
| "sampling/importance_sampling_ratio/min": 0.006919211242347956, |
| "sampling/sampling_logp_difference/max": 4.973453521728516, |
| "sampling/sampling_logp_difference/mean": 0.15489694476127625, |
| "step": 233 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.09375, |
| "clip_ratio/low_mean": 0.1015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1953125, |
| "entropy": 0.29285167157649994, |
| "epoch": 0.6724137931034483, |
| "grad_norm": 0.0008845807751640677, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 234 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.0859375, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.15625, |
| "entropy": 0.2890046238899231, |
| "epoch": 0.6752873563218391, |
| "grad_norm": 0.002121682045981288, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 235 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1171875, |
| "clip_ratio/low_mean": 0.09375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2109375, |
| "entropy": 0.2796691060066223, |
| "epoch": 0.6781609195402298, |
| "grad_norm": 0.0009406171157024801, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 236 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 79.0, |
| "completions/max_terminated_length": 79.0, |
| "completions/mean_length": 52.1953125, |
| "completions/mean_terminated_length": 52.1953125, |
| "completions/min_length": 39.0, |
| "completions/min_terminated_length": 39.0, |
| "entropy": 0.29087191820144653, |
| "epoch": 0.6810344827586207, |
| "frac_reward_zero_std": 0.3125, |
| "grad_norm": 0.0022152920719236135, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "num_tokens": 27027368.0, |
| "reward": 0.7920734286308289, |
| "reward_std": 0.08740311861038208, |
| "rewards/ngram_repetition2/mean": -0.0009828612674027681, |
| "rewards/ngram_repetition2/std": 0.0061480761505663395, |
| "rewards/ngram_repetition3/mean": -0.0006573445862159133, |
| "rewards/ngram_repetition3/std": 0.004901508800685406, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.751953125, |
| "rewards/symbolic_reward_accuracy/std": 0.4323015511035919, |
| "rewards/symbolic_reward_partial_score/mean": 0.8857421875, |
| "rewards/symbolic_reward_partial_score/std": 0.24903303384780884, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1425862312316895, |
| "sampling/importance_sampling_ratio/min": 0.0047275903634727, |
| "sampling/sampling_logp_difference/max": 5.354339599609375, |
| "sampling/sampling_logp_difference/mean": 0.15625977516174316, |
| "step": 237 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1484375, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1953125, |
| "entropy": 0.2970282882452011, |
| "epoch": 0.6839080459770115, |
| "grad_norm": 0.0018691563745960593, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 238 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1640625, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.21875, |
| "entropy": 0.3026685267686844, |
| "epoch": 0.6867816091954023, |
| "grad_norm": 0.0018776139477267861, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 239 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.15625, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1875, |
| "entropy": 0.2863199859857559, |
| "epoch": 0.6896551724137931, |
| "grad_norm": 0.001305580255575478, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 240 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 79.0, |
| "completions/max_terminated_length": 79.0, |
| "completions/mean_length": 50.650390625, |
| "completions/mean_terminated_length": 50.650390625, |
| "completions/min_length": 32.0, |
| "completions/min_terminated_length": 32.0, |
| "entropy": 0.2873764932155609, |
| "epoch": 0.6925287356321839, |
| "frac_reward_zero_std": 0.21875, |
| "grad_norm": 0.002372318645939231, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "num_tokens": 27474613.0, |
| "reward": 0.7715722322463989, |
| "reward_std": 0.11197517067193985, |
| "rewards/ngram_repetition2/mean": -0.0005445921560749412, |
| "rewards/ngram_repetition2/std": 0.004746263846755028, |
| "rewards/ngram_repetition3/mean": -0.0004385022330097854, |
| "rewards/ngram_repetition3/std": 0.005005388054996729, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.72265625, |
| "rewards/symbolic_reward_accuracy/std": 0.4481254518032074, |
| "rewards/symbolic_reward_partial_score/mean": 0.8857421875, |
| "rewards/symbolic_reward_partial_score/std": 0.23940622806549072, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1354875564575195, |
| "sampling/importance_sampling_ratio/min": 0.01270719151943922, |
| "sampling/sampling_logp_difference/max": 4.36558723449707, |
| "sampling/sampling_logp_difference/mean": 0.1487276554107666, |
| "step": 241 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.21875, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2734375, |
| "entropy": 0.285337433218956, |
| "epoch": 0.6954022988505747, |
| "grad_norm": 0.001999761676415801, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 242 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2109375, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.28375518321990967, |
| "epoch": 0.6982758620689655, |
| "grad_norm": 0.0021911542862653732, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 243 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1875, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.25, |
| "entropy": 0.29059699177742004, |
| "epoch": 0.7011494252873564, |
| "grad_norm": 0.001007542130537331, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 244 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 74.0, |
| "completions/max_terminated_length": 74.0, |
| "completions/mean_length": 50.28125, |
| "completions/mean_terminated_length": 50.28125, |
| "completions/min_length": 32.0, |
| "completions/min_terminated_length": 32.0, |
| "entropy": 0.2930067479610443, |
| "epoch": 0.7040229885057471, |
| "frac_reward_zero_std": 0.34375, |
| "grad_norm": 0.0017507770098745823, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "num_tokens": 27893637.0, |
| "reward": 0.8494104146957397, |
| "reward_std": 0.09027449786663055, |
| "rewards/ngram_repetition2/mean": -7.109075522748753e-05, |
| "rewards/ngram_repetition2/std": 0.00128466309979558, |
| "rewards/ngram_repetition3/mean": -0.00029177218675613403, |
| "rewards/ngram_repetition3/std": 0.0030360899399966, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.81640625, |
| "rewards/symbolic_reward_accuracy/std": 0.3875311613082886, |
| "rewards/symbolic_reward_partial_score/mean": 0.9264322519302368, |
| "rewards/symbolic_reward_partial_score/std": 0.20362317562103271, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1433947086334229, |
| "sampling/importance_sampling_ratio/min": 0.0034871636889874935, |
| "sampling/sampling_logp_difference/max": 5.658666610717773, |
| "sampling/sampling_logp_difference/mean": 0.1622186154127121, |
| "step": 245 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.09375, |
| "clip_ratio/low_mean": 0.015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.109375, |
| "entropy": 0.3093741685152054, |
| "epoch": 0.7068965517241379, |
| "grad_norm": 0.0025900655891746283, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 246 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1640625, |
| "entropy": 0.303500235080719, |
| "epoch": 0.7097701149425287, |
| "grad_norm": 0.0009350689360871911, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 247 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1328125, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1875, |
| "entropy": 0.31663645803928375, |
| "epoch": 0.7126436781609196, |
| "grad_norm": 0.0009929202497005463, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 248 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 77.0, |
| "completions/max_terminated_length": 77.0, |
| "completions/mean_length": 53.07421875, |
| "completions/mean_terminated_length": 53.07421875, |
| "completions/min_length": 36.0, |
| "completions/min_terminated_length": 36.0, |
| "entropy": 0.30330008268356323, |
| "epoch": 0.7155172413793104, |
| "frac_reward_zero_std": 0.125, |
| "grad_norm": 0.0020502391271293163, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "num_tokens": 28344811.0, |
| "reward": 0.746130108833313, |
| "reward_std": 0.10843535512685776, |
| "rewards/ngram_repetition2/mean": -0.0006376801757141948, |
| "rewards/ngram_repetition2/std": 0.004766174126416445, |
| "rewards/ngram_repetition3/mean": -0.0006113115232437849, |
| "rewards/ngram_repetition3/std": 0.005070660263299942, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.68359375, |
| "rewards/symbolic_reward_accuracy/std": 0.46552830934524536, |
| "rewards/symbolic_reward_partial_score/mean": 0.89208984375, |
| "rewards/symbolic_reward_partial_score/std": 0.22636321187019348, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1441969871520996, |
| "sampling/importance_sampling_ratio/min": 0.009571997448801994, |
| "sampling/sampling_logp_difference/max": 4.648913383483887, |
| "sampling/sampling_logp_difference/mean": 0.16198162734508514, |
| "step": 249 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1875, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2421875, |
| "entropy": 0.309006005525589, |
| "epoch": 0.7183908045977011, |
| "grad_norm": 0.0022520306520164013, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "step": 250 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.234375, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2734375, |
| "entropy": 0.3168264776468277, |
| "epoch": 0.7212643678160919, |
| "grad_norm": 0.0034188460558652878, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 251 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.171875, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2265625, |
| "entropy": 0.30942320823669434, |
| "epoch": 0.7241379310344828, |
| "grad_norm": 0.002158515155315399, |
| "learning_rate": 1e-05, |
| "loss": -0.0004, |
| "step": 252 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 82.0, |
| "completions/max_terminated_length": 82.0, |
| "completions/mean_length": 55.02734375, |
| "completions/mean_terminated_length": 55.02734375, |
| "completions/min_length": 34.0, |
| "completions/min_terminated_length": 34.0, |
| "entropy": 0.310179203748703, |
| "epoch": 0.7270114942528736, |
| "frac_reward_zero_std": 0.0625, |
| "grad_norm": 0.0023198144044727087, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "num_tokens": 28806489.0, |
| "reward": 0.7360048294067383, |
| "reward_std": 0.11768031865358353, |
| "rewards/ngram_repetition2/mean": -0.001742619788274169, |
| "rewards/ngram_repetition2/std": 0.008961454033851624, |
| "rewards/ngram_repetition3/mean": -0.0012919665314257145, |
| "rewards/ngram_repetition3/std": 0.006052871700376272, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.673828125, |
| "rewards/symbolic_reward_accuracy/std": 0.4692695140838623, |
| "rewards/symbolic_reward_partial_score/mean": 0.8811848759651184, |
| "rewards/symbolic_reward_partial_score/std": 0.22542209923267365, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1476696729660034, |
| "sampling/importance_sampling_ratio/min": 0.006753196474164724, |
| "sampling/sampling_logp_difference/max": 4.997739315032959, |
| "sampling/sampling_logp_difference/mean": 0.16210368275642395, |
| "step": 253 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1953125, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.32601065933704376, |
| "epoch": 0.7298850574712644, |
| "grad_norm": 0.0027695144526660442, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 254 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.203125, |
| "clip_ratio/low_mean": 0.1015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3046875, |
| "entropy": 0.3259432166814804, |
| "epoch": 0.7327586206896551, |
| "grad_norm": 0.0019364689942449331, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 255 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.21875, |
| "clip_ratio/low_mean": 0.1484375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3671875, |
| "entropy": 0.30629634857177734, |
| "epoch": 0.735632183908046, |
| "grad_norm": 0.0017040437087416649, |
| "learning_rate": 1e-05, |
| "loss": 0.0005, |
| "step": 256 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 76.0, |
| "completions/max_terminated_length": 76.0, |
| "completions/mean_length": 53.3359375, |
| "completions/mean_terminated_length": 53.3359375, |
| "completions/min_length": 38.0, |
| "completions/min_terminated_length": 38.0, |
| "entropy": 0.31905922293663025, |
| "epoch": 0.7385057471264368, |
| "frac_reward_zero_std": 0.25, |
| "grad_norm": 0.0018301898380741477, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "num_tokens": 29245925.0, |
| "reward": 0.8677599430084229, |
| "reward_std": 0.13781705498695374, |
| "rewards/ngram_repetition2/mean": -0.0006793971406295896, |
| "rewards/ngram_repetition2/std": 0.007922603748738766, |
| "rewards/ngram_repetition3/mean": -0.0006691771559417248, |
| "rewards/ngram_repetition3/std": 0.006458070129156113, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.83203125, |
| "rewards/symbolic_reward_accuracy/std": 0.374204158782959, |
| "rewards/symbolic_reward_partial_score/mean": 0.951171875, |
| "rewards/symbolic_reward_partial_score/std": 0.1681368052959442, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1527073383331299, |
| "sampling/importance_sampling_ratio/min": 0.004750695079565048, |
| "sampling/sampling_logp_difference/max": 5.349464416503906, |
| "sampling/sampling_logp_difference/mean": 0.17114776372909546, |
| "step": 257 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1875, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2421875, |
| "entropy": 0.30993255972862244, |
| "epoch": 0.7413793103448276, |
| "grad_norm": 0.002674890449270606, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "step": 258 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1328125, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.21875, |
| "entropy": 0.3146713227033615, |
| "epoch": 0.7442528735632183, |
| "grad_norm": 0.0010953254532068968, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 259 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.171875, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2421875, |
| "entropy": 0.30896949768066406, |
| "epoch": 0.7471264367816092, |
| "grad_norm": 0.0015775602078065276, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 260 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 80.0, |
| "completions/max_terminated_length": 80.0, |
| "completions/mean_length": 51.8671875, |
| "completions/mean_terminated_length": 51.8671875, |
| "completions/min_length": 35.0, |
| "completions/min_terminated_length": 35.0, |
| "entropy": 0.3116368055343628, |
| "epoch": 0.75, |
| "frac_reward_zero_std": 0.15625, |
| "grad_norm": 0.002235675696283579, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "num_tokens": 29702817.0, |
| "reward": 0.8898277282714844, |
| "reward_std": 0.11886290460824966, |
| "rewards/ngram_repetition2/mean": -0.0007994142360985279, |
| "rewards/ngram_repetition2/std": 0.006934627424925566, |
| "rewards/ngram_repetition3/mean": -0.0007997690699994564, |
| "rewards/ngram_repetition3/std": 0.005978343077003956, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.8671875, |
| "rewards/symbolic_reward_accuracy/std": 0.33970388770103455, |
| "rewards/symbolic_reward_partial_score/mean": 0.9427083730697632, |
| "rewards/symbolic_reward_partial_score/std": 0.1916244775056839, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.151458740234375, |
| "sampling/importance_sampling_ratio/min": 0.005481026601046324, |
| "sampling/sampling_logp_difference/max": 5.206462860107422, |
| "sampling/sampling_logp_difference/mean": 0.17337773740291595, |
| "step": 261 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2109375, |
| "clip_ratio/low_mean": 0.1015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3125, |
| "entropy": 0.308078333735466, |
| "epoch": 0.7528735632183908, |
| "grad_norm": 0.0009869755012914538, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 262 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.203125, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.30821652710437775, |
| "epoch": 0.7557471264367817, |
| "grad_norm": 0.0018410662887617946, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 263 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.109375, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.15625, |
| "entropy": 0.31607021391391754, |
| "epoch": 0.7586206896551724, |
| "grad_norm": 0.001890576328150928, |
| "learning_rate": 1e-05, |
| "loss": -0.0004, |
| "step": 264 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 74.0, |
| "completions/max_terminated_length": 74.0, |
| "completions/mean_length": 53.72265625, |
| "completions/mean_terminated_length": 53.72265625, |
| "completions/min_length": 35.0, |
| "completions/min_terminated_length": 35.0, |
| "entropy": 0.32434844970703125, |
| "epoch": 0.7614942528735632, |
| "frac_reward_zero_std": 0.15625, |
| "grad_norm": 0.005114416126161814, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "num_tokens": 30135635.0, |
| "reward": 0.8375043272972107, |
| "reward_std": 0.11283920705318451, |
| "rewards/ngram_repetition2/mean": -0.002288718707859516, |
| "rewards/ngram_repetition2/std": 0.011774125508964062, |
| "rewards/ngram_repetition3/mean": -0.002161826938390732, |
| "rewards/ngram_repetition3/std": 0.010816823691129684, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.79296875, |
| "rewards/symbolic_reward_accuracy/std": 0.40557438135147095, |
| "rewards/symbolic_reward_partial_score/mean": 0.9415690302848816, |
| "rewards/symbolic_reward_partial_score/std": 0.16040416061878204, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.159005880355835, |
| "sampling/importance_sampling_ratio/min": 0.0024109813384711742, |
| "sampling/sampling_logp_difference/max": 6.027721405029297, |
| "sampling/sampling_logp_difference/mean": 0.17788560688495636, |
| "step": 265 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.203125, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.25, |
| "entropy": 0.32901355624198914, |
| "epoch": 0.764367816091954, |
| "grad_norm": 0.0023172965738922358, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 266 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1953125, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2265625, |
| "entropy": 0.31494955718517303, |
| "epoch": 0.7672413793103449, |
| "grad_norm": 0.0014608411584049463, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 267 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.203125, |
| "clip_ratio/low_mean": 0.0703125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2734375, |
| "entropy": 0.3259265422821045, |
| "epoch": 0.7701149425287356, |
| "grad_norm": 0.0023232330568134785, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 268 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 77.0, |
| "completions/max_terminated_length": 77.0, |
| "completions/mean_length": 51.58203125, |
| "completions/mean_terminated_length": 51.58203125, |
| "completions/min_length": 32.0, |
| "completions/min_terminated_length": 32.0, |
| "entropy": 0.3111993819475174, |
| "epoch": 0.7729885057471264, |
| "frac_reward_zero_std": 0.1875, |
| "grad_norm": 0.00332874758169055, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "num_tokens": 30564669.0, |
| "reward": 0.8610060214996338, |
| "reward_std": 0.1156771183013916, |
| "rewards/ngram_repetition2/mean": -0.0010844022035598755, |
| "rewards/ngram_repetition2/std": 0.008614128455519676, |
| "rewards/ngram_repetition3/mean": -0.0018316828645765781, |
| "rewards/ngram_repetition3/std": 0.011513025499880314, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.828125, |
| "rewards/symbolic_reward_accuracy/std": 0.3776407241821289, |
| "rewards/symbolic_reward_partial_score/mean": 0.9378255009651184, |
| "rewards/symbolic_reward_partial_score/std": 0.19194307923316956, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1578052043914795, |
| "sampling/importance_sampling_ratio/min": 0.0028162214439362288, |
| "sampling/sampling_logp_difference/max": 5.872359275817871, |
| "sampling/sampling_logp_difference/mean": 0.17754799127578735, |
| "step": 269 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2109375, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.25, |
| "entropy": 0.31480634212493896, |
| "epoch": 0.7758620689655172, |
| "grad_norm": 0.001611092360690236, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 270 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.328125, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3828125, |
| "entropy": 0.3181573897600174, |
| "epoch": 0.7787356321839081, |
| "grad_norm": 0.0019503665389493108, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 271 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.3203125, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3671875, |
| "entropy": 0.32164546847343445, |
| "epoch": 0.7816091954022989, |
| "grad_norm": 0.00171377370133996, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 272 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 82.0, |
| "completions/max_terminated_length": 82.0, |
| "completions/mean_length": 49.7265625, |
| "completions/mean_terminated_length": 49.7265625, |
| "completions/min_length": 33.0, |
| "completions/min_terminated_length": 33.0, |
| "entropy": 0.3150666356086731, |
| "epoch": 0.7844827586206896, |
| "frac_reward_zero_std": 0.3125, |
| "grad_norm": 0.0014554295921698213, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "num_tokens": 31020305.0, |
| "reward": 0.8767499327659607, |
| "reward_std": 0.10860107094049454, |
| "rewards/ngram_repetition2/mean": -0.00034998581395484507, |
| "rewards/ngram_repetition2/std": 0.004037702456116676, |
| "rewards/ngram_repetition3/mean": -0.0004340244340710342, |
| "rewards/ngram_repetition3/std": 0.003480604151263833, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.85546875, |
| "rewards/symbolic_reward_accuracy/std": 0.35197147727012634, |
| "rewards/symbolic_reward_partial_score/mean": 0.9264322519302368, |
| "rewards/symbolic_reward_partial_score/std": 0.21918009221553802, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.15395188331604, |
| "sampling/importance_sampling_ratio/min": 0.003653094405308366, |
| "sampling/sampling_logp_difference/max": 5.612180709838867, |
| "sampling/sampling_logp_difference/mean": 0.17280669510364532, |
| "step": 273 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1484375, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1796875, |
| "entropy": 0.3282313942909241, |
| "epoch": 0.7873563218390804, |
| "grad_norm": 0.0014400951331481338, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 274 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2265625, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.265625, |
| "entropy": 0.3243577927350998, |
| "epoch": 0.7902298850574713, |
| "grad_norm": 0.0014871220337226987, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 275 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2578125, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3203125, |
| "entropy": 0.319719135761261, |
| "epoch": 0.7931034482758621, |
| "grad_norm": 0.0018327397992834449, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "step": 276 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 78.0, |
| "completions/max_terminated_length": 78.0, |
| "completions/mean_length": 50.357421875, |
| "completions/mean_terminated_length": 50.357421875, |
| "completions/min_length": 37.0, |
| "completions/min_terminated_length": 37.0, |
| "entropy": 0.3244573473930359, |
| "epoch": 0.7959770114942529, |
| "frac_reward_zero_std": 0.40625, |
| "grad_norm": 0.002220498863607645, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "num_tokens": 31451880.0, |
| "reward": 0.892960786819458, |
| "reward_std": 0.08757635951042175, |
| "rewards/ngram_repetition2/mean": -0.00032840867061167955, |
| "rewards/ngram_repetition2/std": 0.0031765031162649393, |
| "rewards/ngram_repetition3/mean": -0.00047303378232754767, |
| "rewards/ngram_repetition3/std": 0.00411380548030138, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.87109375, |
| "rewards/symbolic_reward_accuracy/std": 0.33542385697364807, |
| "rewards/symbolic_reward_partial_score/mean": 0.9440103769302368, |
| "rewards/symbolic_reward_partial_score/std": 0.1815319061279297, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.15175461769104, |
| "sampling/importance_sampling_ratio/min": 0.008527955040335655, |
| "sampling/sampling_logp_difference/max": 4.764405727386475, |
| "sampling/sampling_logp_difference/mean": 0.17153030633926392, |
| "step": 277 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1875, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.21875, |
| "entropy": 0.31425492465496063, |
| "epoch": 0.7988505747126436, |
| "grad_norm": 0.0010231384076178074, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 278 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1953125, |
| "clip_ratio/low_mean": 0.015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2109375, |
| "entropy": 0.3150269687175751, |
| "epoch": 0.8017241379310345, |
| "grad_norm": 0.0016076903557404876, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 279 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.21875, |
| "entropy": 0.31596778333187103, |
| "epoch": 0.8045977011494253, |
| "grad_norm": 0.0005898877861909568, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 280 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 74.0, |
| "completions/max_terminated_length": 74.0, |
| "completions/mean_length": 49.728515625, |
| "completions/mean_terminated_length": 49.728515625, |
| "completions/min_length": 38.0, |
| "completions/min_terminated_length": 38.0, |
| "entropy": 0.3190753608942032, |
| "epoch": 0.8074712643678161, |
| "frac_reward_zero_std": 0.375, |
| "grad_norm": 0.0013339928118512034, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "num_tokens": 31901181.0, |
| "reward": 0.7412005066871643, |
| "reward_std": 0.08384595811367035, |
| "rewards/ngram_repetition2/mean": -0.0005108925397507846, |
| "rewards/ngram_repetition2/std": 0.004918646067380905, |
| "rewards/ngram_repetition3/mean": -0.0005344899836927652, |
| "rewards/ngram_repetition3/std": 0.004946576897054911, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.68359375, |
| "rewards/symbolic_reward_accuracy/std": 0.46552830934524536, |
| "rewards/symbolic_reward_partial_score/mean": 0.8756510019302368, |
| "rewards/symbolic_reward_partial_score/std": 0.22855894267559052, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1574604511260986, |
| "sampling/importance_sampling_ratio/min": 0.007844127714633942, |
| "sampling/sampling_logp_difference/max": 4.847990036010742, |
| "sampling/sampling_logp_difference/mean": 0.17656563222408295, |
| "step": 281 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1171875, |
| "clip_ratio/low_mean": 0.0234375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.140625, |
| "entropy": 0.3263961225748062, |
| "epoch": 0.8103448275862069, |
| "grad_norm": 0.001344437012448907, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 282 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.109375, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.171875, |
| "entropy": 0.31043170392513275, |
| "epoch": 0.8132183908045977, |
| "grad_norm": 0.0022367271594703197, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "step": 283 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1953125, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.234375, |
| "entropy": 0.30807800590991974, |
| "epoch": 0.8160919540229885, |
| "grad_norm": 0.0007553516770713031, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 284 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 76.0, |
| "completions/max_terminated_length": 76.0, |
| "completions/mean_length": 49.177734375, |
| "completions/mean_terminated_length": 49.177734375, |
| "completions/min_length": 33.0, |
| "completions/min_terminated_length": 33.0, |
| "entropy": 0.3095483332872391, |
| "epoch": 0.8189655172413793, |
| "frac_reward_zero_std": 0.1875, |
| "grad_norm": 0.002113133668899536, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "num_tokens": 32350680.0, |
| "reward": 0.8011506795883179, |
| "reward_std": 0.09198400378227234, |
| "rewards/ngram_repetition2/mean": -0.0008190472144633532, |
| "rewards/ngram_repetition2/std": 0.005462025757879019, |
| "rewards/ngram_repetition3/mean": -0.0013033249415457249, |
| "rewards/ngram_repetition3/std": 0.007873849011957645, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.751953125, |
| "rewards/symbolic_reward_accuracy/std": 0.4323015511035919, |
| "rewards/symbolic_reward_partial_score/mean": 0.916015625, |
| "rewards/symbolic_reward_partial_score/std": 0.17717863619327545, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.157573938369751, |
| "sampling/importance_sampling_ratio/min": 0.0037520925980061293, |
| "sampling/sampling_logp_difference/max": 5.585441589355469, |
| "sampling/sampling_logp_difference/mean": 0.17748284339904785, |
| "step": 285 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2421875, |
| "entropy": 0.3058888614177704, |
| "epoch": 0.8218390804597702, |
| "grad_norm": 0.0015753849875181913, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 286 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2421875, |
| "clip_ratio/low_mean": 0.0859375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.328125, |
| "entropy": 0.318478599190712, |
| "epoch": 0.8247126436781609, |
| "grad_norm": 0.0023167389445006847, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 287 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.3046875, |
| "clip_ratio/low_mean": 0.09375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.3984375, |
| "entropy": 0.30891837179660797, |
| "epoch": 0.8275862068965517, |
| "grad_norm": 0.0011400324292480946, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 288 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 73.0, |
| "completions/max_terminated_length": 73.0, |
| "completions/mean_length": 47.052734375, |
| "completions/mean_terminated_length": 47.052734375, |
| "completions/min_length": 33.0, |
| "completions/min_terminated_length": 33.0, |
| "entropy": 0.31216710805892944, |
| "epoch": 0.8304597701149425, |
| "frac_reward_zero_std": 0.3125, |
| "grad_norm": 0.0013744381722062826, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "num_tokens": 32771219.0, |
| "reward": 0.8360303640365601, |
| "reward_std": 0.1082114726305008, |
| "rewards/ngram_repetition2/mean": -0.00015832216013222933, |
| "rewards/ngram_repetition2/std": 0.0017230219673365355, |
| "rewards/ngram_repetition3/mean": -0.00032279096194542944, |
| "rewards/ngram_repetition3/std": 0.003777548670768738, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.796875, |
| "rewards/symbolic_reward_accuracy/std": 0.4027182459831238, |
| "rewards/symbolic_reward_partial_score/mean": 0.9274088740348816, |
| "rewards/symbolic_reward_partial_score/std": 0.19053959846496582, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1560266017913818, |
| "sampling/importance_sampling_ratio/min": 0.0048217857256531715, |
| "sampling/sampling_logp_difference/max": 5.334610939025879, |
| "sampling/sampling_logp_difference/mean": 0.1788705289363861, |
| "step": 289 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1484375, |
| "clip_ratio/low_mean": 0.015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1640625, |
| "entropy": 0.29697035253047943, |
| "epoch": 0.8333333333333334, |
| "grad_norm": 0.0019398077856749296, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 290 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.171875, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.234375, |
| "entropy": 0.28635063767433167, |
| "epoch": 0.8362068965517241, |
| "grad_norm": 0.0011315299198031425, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 291 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.203125, |
| "clip_ratio/low_mean": 0.0234375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2265625, |
| "entropy": 0.2855062931776047, |
| "epoch": 0.8390804597701149, |
| "grad_norm": 0.0018408946925774217, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 292 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 65.0, |
| "completions/max_terminated_length": 65.0, |
| "completions/mean_length": 47.859375, |
| "completions/mean_terminated_length": 47.859375, |
| "completions/min_length": 33.0, |
| "completions/min_terminated_length": 33.0, |
| "entropy": 0.2754402905702591, |
| "epoch": 0.8419540229885057, |
| "frac_reward_zero_std": 0.3125, |
| "grad_norm": 0.002310027601197362, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "num_tokens": 33213227.0, |
| "reward": 0.8567273616790771, |
| "reward_std": 0.1125885546207428, |
| "rewards/ngram_repetition2/mean": -0.0004661846614908427, |
| "rewards/ngram_repetition2/std": 0.005311489105224609, |
| "rewards/ngram_repetition3/mean": -0.0006299333763308823, |
| "rewards/ngram_repetition3/std": 0.006063228473067284, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.818359375, |
| "rewards/symbolic_reward_accuracy/std": 0.38592514395713806, |
| "rewards/symbolic_reward_partial_score/mean": 0.9462890625, |
| "rewards/symbolic_reward_partial_score/std": 0.162509486079216, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1567089557647705, |
| "sampling/importance_sampling_ratio/min": 0.009652514941990376, |
| "sampling/sampling_logp_difference/max": 4.640536785125732, |
| "sampling/sampling_logp_difference/mean": 0.17354559898376465, |
| "step": 293 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.140625, |
| "clip_ratio/low_mean": 0.015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.15625, |
| "entropy": 0.28624600172042847, |
| "epoch": 0.8448275862068966, |
| "grad_norm": 0.0017463957192376256, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 294 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.2109375, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2421875, |
| "entropy": 0.2811366319656372, |
| "epoch": 0.8477011494252874, |
| "grad_norm": 0.001280653988942504, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 295 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.21875, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.26673395931720734, |
| "epoch": 0.8505747126436781, |
| "grad_norm": 0.0017503536073490977, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 296 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 66.0, |
| "completions/max_terminated_length": 66.0, |
| "completions/mean_length": 47.21484375, |
| "completions/mean_terminated_length": 47.21484375, |
| "completions/min_length": 35.0, |
| "completions/min_terminated_length": 35.0, |
| "entropy": 0.24163145571947098, |
| "epoch": 0.853448275862069, |
| "frac_reward_zero_std": 0.40625, |
| "grad_norm": 0.001637010253034532, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "num_tokens": 33664569.0, |
| "reward": 0.9082980751991272, |
| "reward_std": 0.10888613760471344, |
| "rewards/ngram_repetition2/mean": -9.320468234363943e-05, |
| "rewards/ngram_repetition2/std": 0.0017233911203220487, |
| "rewards/ngram_repetition3/mean": -0.00018059475405607373, |
| "rewards/ngram_repetition3/std": 0.002074119634926319, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.890625, |
| "rewards/symbolic_reward_accuracy/std": 0.31241437792778015, |
| "rewards/symbolic_reward_partial_score/mean": 0.9495442509651184, |
| "rewards/symbolic_reward_partial_score/std": 0.19056856632232666, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.144517421722412, |
| "sampling/importance_sampling_ratio/min": 0.0013258950784802437, |
| "sampling/sampling_logp_difference/max": 6.625667572021484, |
| "sampling/sampling_logp_difference/mean": 0.16017059981822968, |
| "step": 297 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1171875, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1796875, |
| "entropy": 0.24898061156272888, |
| "epoch": 0.8563218390804598, |
| "grad_norm": 0.0015583988279104233, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 298 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.109375, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.140625, |
| "entropy": 0.2613293379545212, |
| "epoch": 0.8591954022988506, |
| "grad_norm": 0.0013029163237661123, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 299 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1328125, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.171875, |
| "entropy": 0.25456735491752625, |
| "epoch": 0.8620689655172413, |
| "grad_norm": 0.0016972015146166086, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 300 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 70.0, |
| "completions/max_terminated_length": 70.0, |
| "completions/mean_length": 47.791015625, |
| "completions/mean_terminated_length": 47.791015625, |
| "completions/min_length": 36.0, |
| "completions/min_terminated_length": 36.0, |
| "entropy": 0.26080329716205597, |
| "epoch": 0.8649425287356322, |
| "frac_reward_zero_std": 0.375, |
| "grad_norm": 0.0011522574350237846, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "num_tokens": 34103534.0, |
| "reward": 0.8019988536834717, |
| "reward_std": 0.12343436479568481, |
| "rewards/ngram_repetition2/mean": -0.00018372925114817917, |
| "rewards/ngram_repetition2/std": 0.0026741281617432833, |
| "rewards/ngram_repetition3/mean": -0.0001245810417458415, |
| "rewards/ngram_repetition3/std": 0.001959472196176648, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.74609375, |
| "rewards/symbolic_reward_accuracy/std": 0.43567025661468506, |
| "rewards/symbolic_reward_partial_score/mean": 0.9324544072151184, |
| "rewards/symbolic_reward_partial_score/std": 0.16472379863262177, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1413416862487793, |
| "sampling/importance_sampling_ratio/min": 0.011449605226516724, |
| "sampling/sampling_logp_difference/max": 4.469799995422363, |
| "sampling/sampling_logp_difference/mean": 0.16359643638134003, |
| "step": 301 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.09375, |
| "clip_ratio/low_mean": 0.0234375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1171875, |
| "entropy": 0.27188640832901, |
| "epoch": 0.867816091954023, |
| "grad_norm": 0.0026438417844474316, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "step": 302 |
| }, |
| { |
| "clip_ratio/high_max": 0.5, |
| "clip_ratio/high_mean": 0.1171875, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1484375, |
| "entropy": 0.25686506927013397, |
| "epoch": 0.8706896551724138, |
| "grad_norm": 0.002449192339554429, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 303 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.0234375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.203125, |
| "entropy": 0.26332540810108185, |
| "epoch": 0.8735632183908046, |
| "grad_norm": 0.001110375509597361, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 304 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 60.0, |
| "completions/max_terminated_length": 60.0, |
| "completions/mean_length": 45.3671875, |
| "completions/mean_terminated_length": 45.3671875, |
| "completions/min_length": 33.0, |
| "completions/min_terminated_length": 33.0, |
| "entropy": 0.26433664560317993, |
| "epoch": 0.8764367816091954, |
| "frac_reward_zero_std": 0.40625, |
| "grad_norm": 0.0027283106464892626, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "num_tokens": 34535562.0, |
| "reward": 0.8602952361106873, |
| "reward_std": 0.11447380483150482, |
| "rewards/ngram_repetition2/mean": -0.00026701093884184957, |
| "rewards/ngram_repetition2/std": 0.0053100138902664185, |
| "rewards/ngram_repetition3/mean": -0.00048113608499988914, |
| "rewards/ngram_repetition3/std": 0.00627403799444437, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.82421875, |
| "rewards/symbolic_reward_accuracy/std": 0.3810062110424042, |
| "rewards/symbolic_reward_partial_score/mean": 0.9444986581802368, |
| "rewards/symbolic_reward_partial_score/std": 0.17329798638820648, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1487350463867188, |
| "sampling/importance_sampling_ratio/min": 0.007460631895810366, |
| "sampling/sampling_logp_difference/max": 4.898115158081055, |
| "sampling/sampling_logp_difference/mean": 0.16636380553245544, |
| "step": 305 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.15625, |
| "entropy": 0.25559763610363007, |
| "epoch": 0.8793103448275862, |
| "grad_norm": 0.0008761414792388678, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 306 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1875, |
| "entropy": 0.25650885701179504, |
| "epoch": 0.882183908045977, |
| "grad_norm": 0.0019666922744363546, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "step": 307 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.0078125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1328125, |
| "entropy": 0.25443021953105927, |
| "epoch": 0.8850574712643678, |
| "grad_norm": 0.0010442298371344805, |
| "learning_rate": 1e-05, |
| "loss": -0.0004, |
| "step": 308 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 66.0, |
| "completions/max_terminated_length": 66.0, |
| "completions/mean_length": 46.666015625, |
| "completions/mean_terminated_length": 46.666015625, |
| "completions/min_length": 35.0, |
| "completions/min_terminated_length": 35.0, |
| "entropy": 0.2645321190357208, |
| "epoch": 0.8879310344827587, |
| "frac_reward_zero_std": 0.40625, |
| "grad_norm": 0.001618504524230957, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "num_tokens": 34973791.0, |
| "reward": 0.7723127603530884, |
| "reward_std": 0.09737245738506317, |
| "rewards/ngram_repetition2/mean": -5.219543163548224e-05, |
| "rewards/ngram_repetition2/std": 0.0008356897160410881, |
| "rewards/ngram_repetition3/mean": -0.00011343907681293786, |
| "rewards/ngram_repetition3/std": 0.0018149681854993105, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.71875, |
| "rewards/symbolic_reward_accuracy/std": 0.45004892349243164, |
| "rewards/symbolic_reward_partial_score/mean": 0.8972981572151184, |
| "rewards/symbolic_reward_partial_score/std": 0.21487106382846832, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1448173522949219, |
| "sampling/importance_sampling_ratio/min": 0.008199482224881649, |
| "sampling/sampling_logp_difference/max": 4.803684234619141, |
| "sampling/sampling_logp_difference/mean": 0.16441011428833008, |
| "step": 309 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1953125, |
| "clip_ratio/low_mean": 0.0234375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.21875, |
| "entropy": 0.26343706250190735, |
| "epoch": 0.8908045977011494, |
| "grad_norm": 0.0015610281843692064, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 310 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1328125, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1875, |
| "entropy": 0.2810946851968765, |
| "epoch": 0.8936781609195402, |
| "grad_norm": 0.00144387932959944, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 311 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2265625, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.265625, |
| "entropy": 0.2614471912384033, |
| "epoch": 0.896551724137931, |
| "grad_norm": 0.0014948392054066062, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 312 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 62.0, |
| "completions/max_terminated_length": 62.0, |
| "completions/mean_length": 45.744140625, |
| "completions/mean_terminated_length": 45.744140625, |
| "completions/min_length": 28.0, |
| "completions/min_terminated_length": 28.0, |
| "entropy": 0.2560636028647423, |
| "epoch": 0.8994252873563219, |
| "frac_reward_zero_std": 0.3125, |
| "grad_norm": 0.0025938425678759813, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "num_tokens": 35424380.0, |
| "reward": 0.7669895887374878, |
| "reward_std": 0.12247426062822342, |
| "rewards/ngram_repetition2/mean": -8.339614578289911e-05, |
| "rewards/ngram_repetition2/std": 0.0017096961382776499, |
| "rewards/ngram_repetition3/mean": -0.00017701656906865537, |
| "rewards/ngram_repetition3/std": 0.002198555273935199, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.712890625, |
| "rewards/symbolic_reward_accuracy/std": 0.45285552740097046, |
| "rewards/symbolic_reward_partial_score/mean": 0.8932291269302368, |
| "rewards/symbolic_reward_partial_score/std": 0.21745367348194122, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1427170038223267, |
| "sampling/importance_sampling_ratio/min": 0.006228272803127766, |
| "sampling/sampling_logp_difference/max": 5.078656196594238, |
| "sampling/sampling_logp_difference/mean": 0.1635725051164627, |
| "step": 313 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.109375, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.171875, |
| "entropy": 0.2608746290206909, |
| "epoch": 0.9022988505747126, |
| "grad_norm": 0.0004902381915599108, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 314 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.15625, |
| "clip_ratio/low_mean": 0.09375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.25, |
| "entropy": 0.2573833763599396, |
| "epoch": 0.9051724137931034, |
| "grad_norm": 0.0018413531361147761, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 315 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.078125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.24609864503145218, |
| "epoch": 0.9080459770114943, |
| "grad_norm": 0.0009675182518549263, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 316 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 63.0, |
| "completions/max_terminated_length": 63.0, |
| "completions/mean_length": 45.85546875, |
| "completions/mean_terminated_length": 45.85546875, |
| "completions/min_length": 35.0, |
| "completions/min_terminated_length": 35.0, |
| "entropy": 0.259112149477005, |
| "epoch": 0.9109195402298851, |
| "frac_reward_zero_std": 0.3125, |
| "grad_norm": 0.0020838521886616945, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "num_tokens": 35850002.0, |
| "reward": 0.838620662689209, |
| "reward_std": 0.11595729738473892, |
| "rewards/ngram_repetition2/mean": -0.00015648298722226173, |
| "rewards/ngram_repetition2/std": 0.0020981046836823225, |
| "rewards/ngram_repetition3/mean": -8.584936585975811e-05, |
| "rewards/ngram_repetition3/std": 0.0013526281109079719, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.794921875, |
| "rewards/symbolic_reward_accuracy/std": 0.4041535556316376, |
| "rewards/symbolic_reward_partial_score/mean": 0.9405924081802368, |
| "rewards/symbolic_reward_partial_score/std": 0.17341189086437225, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1437469720840454, |
| "sampling/importance_sampling_ratio/min": 0.0033797782380133867, |
| "sampling/sampling_logp_difference/max": 5.689945220947266, |
| "sampling/sampling_logp_difference/mean": 0.16323210299015045, |
| "step": 317 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.0234375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1484375, |
| "entropy": 0.25699713826179504, |
| "epoch": 0.9137931034482759, |
| "grad_norm": 0.0027148413937538862, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 318 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1328125, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1875, |
| "entropy": 0.2601219713687897, |
| "epoch": 0.9166666666666666, |
| "grad_norm": 0.0011547203175723553, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "step": 319 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.171875, |
| "clip_ratio/low_mean": 0.0234375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1953125, |
| "entropy": 0.2641301900148392, |
| "epoch": 0.9195402298850575, |
| "grad_norm": 0.002280124928802252, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 320 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 70.0, |
| "completions/max_terminated_length": 70.0, |
| "completions/mean_length": 47.744140625, |
| "completions/mean_terminated_length": 47.744140625, |
| "completions/min_length": 32.0, |
| "completions/min_terminated_length": 32.0, |
| "entropy": 0.27469751238822937, |
| "epoch": 0.9224137931034483, |
| "frac_reward_zero_std": 0.25, |
| "grad_norm": 0.001370429527014494, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "num_tokens": 36301935.0, |
| "reward": 0.8414936661720276, |
| "reward_std": 0.08875171840190887, |
| "rewards/ngram_repetition2/mean": -0.000355370226316154, |
| "rewards/ngram_repetition2/std": 0.003095061983913183, |
| "rewards/ngram_repetition3/mean": -0.0006652825977653265, |
| "rewards/ngram_repetition3/std": 0.003885059617459774, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.80859375, |
| "rewards/symbolic_reward_accuracy/std": 0.3937928080558777, |
| "rewards/symbolic_reward_partial_score/mean": 0.9182942509651184, |
| "rewards/symbolic_reward_partial_score/std": 0.21086378395557404, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1547727584838867, |
| "sampling/importance_sampling_ratio/min": 0.002118661068379879, |
| "sampling/sampling_logp_difference/max": 6.156970977783203, |
| "sampling/sampling_logp_difference/mean": 0.17685817182064056, |
| "step": 321 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.140625, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.171875, |
| "entropy": 0.2789808511734009, |
| "epoch": 0.9252873563218391, |
| "grad_norm": 0.0013959211064502597, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 322 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1484375, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1796875, |
| "entropy": 0.2980600446462631, |
| "epoch": 0.9281609195402298, |
| "grad_norm": 0.002104496583342552, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 323 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.125, |
| "clip_ratio/low_mean": 0.0625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1875, |
| "entropy": 0.26825977861881256, |
| "epoch": 0.9310344827586207, |
| "grad_norm": 0.00133817782625556, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "step": 324 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 69.0, |
| "completions/max_terminated_length": 69.0, |
| "completions/mean_length": 48.28125, |
| "completions/mean_terminated_length": 48.28125, |
| "completions/min_length": 28.0, |
| "completions/min_terminated_length": 28.0, |
| "entropy": 0.2749357968568802, |
| "epoch": 0.9339080459770115, |
| "frac_reward_zero_std": 0.125, |
| "grad_norm": 0.0011246444191783667, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "num_tokens": 36785023.0, |
| "reward": 0.7912914156913757, |
| "reward_std": 0.11614967882633209, |
| "rewards/ngram_repetition2/mean": -0.0005333342123776674, |
| "rewards/ngram_repetition2/std": 0.003764254041016102, |
| "rewards/ngram_repetition3/mean": -0.001186647918075323, |
| "rewards/ngram_repetition3/std": 0.00689814705401659, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.734375, |
| "rewards/symbolic_reward_accuracy/std": 0.44209739565849304, |
| "rewards/symbolic_reward_partial_score/mean": 0.9241536855697632, |
| "rewards/symbolic_reward_partial_score/std": 0.17832158505916595, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1506119966506958, |
| "sampling/importance_sampling_ratio/min": 0.005779067520052195, |
| "sampling/sampling_logp_difference/max": 5.153512954711914, |
| "sampling/sampling_logp_difference/mean": 0.17207130789756775, |
| "step": 325 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.203125, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.25, |
| "entropy": 0.2859587073326111, |
| "epoch": 0.9367816091954023, |
| "grad_norm": 0.001755884732119739, |
| "learning_rate": 1e-05, |
| "loss": -0.0002, |
| "step": 326 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2890625, |
| "clip_ratio/low_mean": 0.0390625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.328125, |
| "entropy": 0.2725732624530792, |
| "epoch": 0.9396551724137931, |
| "grad_norm": 0.002358856610953808, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 327 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.234375, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2890625, |
| "entropy": 0.28172941505908966, |
| "epoch": 0.9425287356321839, |
| "grad_norm": 0.0033431202173233032, |
| "learning_rate": 1e-05, |
| "loss": 0.0004, |
| "step": 328 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 66.0, |
| "completions/max_terminated_length": 66.0, |
| "completions/mean_length": 47.630859375, |
| "completions/mean_terminated_length": 47.630859375, |
| "completions/min_length": 28.0, |
| "completions/min_terminated_length": 28.0, |
| "entropy": 0.27418090403079987, |
| "epoch": 0.9454022988505747, |
| "frac_reward_zero_std": 0.3125, |
| "grad_norm": 0.0034468630328774452, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "num_tokens": 37230082.0, |
| "reward": 0.8493610620498657, |
| "reward_std": 0.12724488973617554, |
| "rewards/ngram_repetition2/mean": -0.00019963737577199936, |
| "rewards/ngram_repetition2/std": 0.003859966993331909, |
| "rewards/ngram_repetition3/mean": -0.00022095959866419435, |
| "rewards/ngram_repetition3/std": 0.004134077113121748, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.81640625, |
| "rewards/symbolic_reward_accuracy/std": 0.3875311613082886, |
| "rewards/symbolic_reward_partial_score/mean": 0.92626953125, |
| "rewards/symbolic_reward_partial_score/std": 0.20346400141716003, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.14650559425354, |
| "sampling/importance_sampling_ratio/min": 0.0031521148048341274, |
| "sampling/sampling_logp_difference/max": 5.759681701660156, |
| "sampling/sampling_logp_difference/mean": 0.16905644536018372, |
| "step": 329 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2265625, |
| "entropy": 0.28660906851291656, |
| "epoch": 0.9482758620689655, |
| "grad_norm": 0.0015979736344888806, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 330 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.234375, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.28125, |
| "entropy": 0.28045378625392914, |
| "epoch": 0.9511494252873564, |
| "grad_norm": 0.0019632827024906874, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 331 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.078125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.2773226499557495, |
| "epoch": 0.9540229885057471, |
| "grad_norm": 0.0015298571670427918, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 332 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 60.0, |
| "completions/max_terminated_length": 60.0, |
| "completions/mean_length": 44.287109375, |
| "completions/mean_terminated_length": 44.287109375, |
| "completions/min_length": 28.0, |
| "completions/min_terminated_length": 28.0, |
| "entropy": 0.2780776619911194, |
| "epoch": 0.9568965517241379, |
| "frac_reward_zero_std": 0.40625, |
| "grad_norm": 0.002085136715322733, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "num_tokens": 37667573.0, |
| "reward": 0.89306640625, |
| "reward_std": 0.1382032036781311, |
| "rewards/ngram_repetition2/mean": 0.0, |
| "rewards/ngram_repetition2/std": 0.0, |
| "rewards/ngram_repetition3/mean": 0.0, |
| "rewards/ngram_repetition3/std": 0.0, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.857421875, |
| "rewards/symbolic_reward_accuracy/std": 0.3499840497970581, |
| "rewards/symbolic_reward_partial_score/mean": 0.9762369394302368, |
| "rewards/symbolic_reward_partial_score/std": 0.1040315255522728, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1428526639938354, |
| "sampling/importance_sampling_ratio/min": 0.009135945700109005, |
| "sampling/sampling_logp_difference/max": 4.695538520812988, |
| "sampling/sampling_logp_difference/mean": 0.1694696843624115, |
| "step": 333 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.1796875, |
| "clip_ratio/low_mean": 0.015625, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1953125, |
| "entropy": 0.28392770886421204, |
| "epoch": 0.9597701149425287, |
| "grad_norm": 0.0020160162821412086, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 334 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1640625, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1953125, |
| "entropy": 0.28121981024742126, |
| "epoch": 0.9626436781609196, |
| "grad_norm": 0.0019810826051980257, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 335 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.2109375, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2578125, |
| "entropy": 0.27402184903621674, |
| "epoch": 0.9655172413793104, |
| "grad_norm": 0.0008822702220641077, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 336 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 63.0, |
| "completions/max_terminated_length": 63.0, |
| "completions/mean_length": 45.30078125, |
| "completions/mean_terminated_length": 45.30078125, |
| "completions/min_length": 35.0, |
| "completions/min_terminated_length": 35.0, |
| "entropy": 0.2705724239349365, |
| "epoch": 0.9683908045977011, |
| "frac_reward_zero_std": 0.34375, |
| "grad_norm": 0.0026577531825751066, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "num_tokens": 38093551.0, |
| "reward": 0.815625011920929, |
| "reward_std": 0.09714089334011078, |
| "rewards/ngram_repetition2/mean": 0.0, |
| "rewards/ngram_repetition2/std": 0.0, |
| "rewards/ngram_repetition3/mean": 0.0, |
| "rewards/ngram_repetition3/std": 0.0, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.779296875, |
| "rewards/symbolic_reward_accuracy/std": 0.4151262938976288, |
| "rewards/symbolic_reward_partial_score/mean": 0.900390625, |
| "rewards/symbolic_reward_partial_score/std": 0.23409590125083923, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1370817422866821, |
| "sampling/importance_sampling_ratio/min": 0.003680461086332798, |
| "sampling/sampling_logp_difference/max": 5.604717254638672, |
| "sampling/sampling_logp_difference/mean": 0.16051676869392395, |
| "step": 337 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1484375, |
| "clip_ratio/low_mean": 0.0546875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.203125, |
| "entropy": 0.25970782339572906, |
| "epoch": 0.9712643678160919, |
| "grad_norm": 0.0025209251325577497, |
| "learning_rate": 1e-05, |
| "loss": 0.0002, |
| "step": 338 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1953125, |
| "clip_ratio/low_mean": 0.078125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.2734375, |
| "entropy": 0.25134529173374176, |
| "epoch": 0.9741379310344828, |
| "grad_norm": 0.0011691589606925845, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 339 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.21875, |
| "clip_ratio/low_mean": 0.03125, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.25, |
| "entropy": 0.25292646884918213, |
| "epoch": 0.9770114942528736, |
| "grad_norm": 0.0009160270565189421, |
| "learning_rate": 1e-05, |
| "loss": -0.0005, |
| "step": 340 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 67.0, |
| "completions/max_terminated_length": 67.0, |
| "completions/mean_length": 45.47265625, |
| "completions/mean_terminated_length": 45.47265625, |
| "completions/min_length": 36.0, |
| "completions/min_terminated_length": 36.0, |
| "entropy": 0.24236022680997849, |
| "epoch": 0.9798850574712644, |
| "frac_reward_zero_std": 0.46875, |
| "grad_norm": 0.002754463814198971, |
| "learning_rate": 1e-05, |
| "loss": 0.0003, |
| "num_tokens": 38537985.0, |
| "reward": 0.790576159954071, |
| "reward_std": 0.10548844188451767, |
| "rewards/ngram_repetition2/mean": 0.0, |
| "rewards/ngram_repetition2/std": 0.0, |
| "rewards/ngram_repetition3/mean": 0.0, |
| "rewards/ngram_repetition3/std": 0.0, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.740234375, |
| "rewards/symbolic_reward_accuracy/std": 0.4389347732067108, |
| "rewards/symbolic_reward_partial_score/mean": 0.9080403447151184, |
| "rewards/symbolic_reward_partial_score/std": 0.20210063457489014, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1354217529296875, |
| "sampling/importance_sampling_ratio/min": 0.005959612783044577, |
| "sampling/sampling_logp_difference/max": 5.1227498054504395, |
| "sampling/sampling_logp_difference/mean": 0.16084396839141846, |
| "step": 341 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.109375, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.15625, |
| "entropy": 0.2414681240916252, |
| "epoch": 0.9827586206896551, |
| "grad_norm": 0.0012340678367763758, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 342 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.09375, |
| "clip_ratio/low_mean": 0.0234375, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1171875, |
| "entropy": 0.23134589940309525, |
| "epoch": 0.985632183908046, |
| "grad_norm": 0.0013804087648168206, |
| "learning_rate": 1e-05, |
| "loss": -0.0003, |
| "step": 343 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.09375, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.09375, |
| "entropy": 0.23478808999061584, |
| "epoch": 0.9885057471264368, |
| "grad_norm": 0.0015636914176866412, |
| "learning_rate": 1e-05, |
| "loss": -0.0001, |
| "step": 344 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 66.0, |
| "completions/max_terminated_length": 66.0, |
| "completions/mean_length": 45.44140625, |
| "completions/mean_terminated_length": 45.44140625, |
| "completions/min_length": 36.0, |
| "completions/min_terminated_length": 36.0, |
| "entropy": 0.2531556040048599, |
| "epoch": 0.9913793103448276, |
| "frac_reward_zero_std": 0.46875, |
| "grad_norm": 0.001333514112047851, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "num_tokens": 38975747.0, |
| "reward": 0.8312982320785522, |
| "reward_std": 0.09679631888866425, |
| "rewards/ngram_repetition2/mean": -6.133001443231478e-05, |
| "rewards/ngram_repetition2/std": 0.00123770406935364, |
| "rewards/ngram_repetition3/mean": 0.0, |
| "rewards/ngram_repetition3/std": 0.0, |
| "rewards/sentence_repetition/mean": 0.0, |
| "rewards/sentence_repetition/std": 0.0, |
| "rewards/symbolic_reward_accuracy/mean": 0.787109375, |
| "rewards/symbolic_reward_accuracy/std": 0.409751296043396, |
| "rewards/symbolic_reward_partial_score/mean": 0.9344075322151184, |
| "rewards/symbolic_reward_partial_score/std": 0.172744482755661, |
| "rewards/tag_count_reward/mean": 0.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1468303203582764, |
| "sampling/importance_sampling_ratio/min": 0.009383895434439182, |
| "sampling/sampling_logp_difference/max": 4.668760299682617, |
| "sampling/sampling_logp_difference/mean": 0.16860270500183105, |
| "step": 345 |
| }, |
| { |
| "clip_ratio/high_max": 0.75, |
| "clip_ratio/high_mean": 0.0703125, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1171875, |
| "entropy": 0.2506842166185379, |
| "epoch": 0.9942528735632183, |
| "grad_norm": 0.0016360621666535735, |
| "learning_rate": 1e-05, |
| "loss": 0.0001, |
| "step": 346 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.140625, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1875, |
| "entropy": 0.25267085433006287, |
| "epoch": 0.9971264367816092, |
| "grad_norm": 0.0012759178644046187, |
| "learning_rate": 1e-05, |
| "loss": -0.0, |
| "step": 347 |
| }, |
| { |
| "clip_ratio/high_max": 1.0, |
| "clip_ratio/high_mean": 0.1328125, |
| "clip_ratio/low_mean": 0.046875, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.1796875, |
| "entropy": 0.2591947913169861, |
| "epoch": 1.0, |
| "grad_norm": 0.0011629678774625063, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_clip_ratio/high_max": 0.0, |
| "eval_clip_ratio/high_mean": 0.0, |
| "eval_clip_ratio/low_mean": 0.0, |
| "eval_clip_ratio/low_min": 0.0, |
| "eval_clip_ratio/region_mean": 0.0, |
| "eval_completions/clipped_ratio": 0.0, |
| "eval_completions/max_length": 60.78947368421053, |
| "eval_completions/max_terminated_length": 60.78947368421053, |
| "eval_completions/mean_length": 46.36307565789474, |
| "eval_completions/mean_terminated_length": 46.36307565789474, |
| "eval_completions/min_length": 30.05263157894737, |
| "eval_completions/min_terminated_length": 30.05263157894737, |
| "eval_entropy": 0.254553884267807, |
| "eval_frac_reward_zero_std": 0.3815789473684211, |
| "eval_loss": -4.107596851099515e-06, |
| "eval_num_tokens": 38975747.0, |
| "eval_reward": 0.7022716889255926, |
| "eval_reward_std": 0.12830137265355965, |
| "eval_rewards/ngram_repetition2/mean": -5.1669975571138295e-06, |
| "eval_rewards/ngram_repetition2/std": 5.8457904838417706e-05, |
| "eval_rewards/ngram_repetition3/mean": -6.034150015023586e-06, |
| "eval_rewards/ngram_repetition3/std": 6.826861614459439e-05, |
| "eval_rewards/sentence_repetition/mean": 0.0, |
| "eval_rewards/sentence_repetition/std": 0.0, |
| "eval_rewards/symbolic_reward_accuracy/mean": 0.6480263157894737, |
| "eval_rewards/symbolic_reward_accuracy/std": 0.44470502357733876, |
| "eval_rewards/symbolic_reward_partial_score/mean": 0.8326822895752756, |
| "eval_rewards/symbolic_reward_partial_score/std": 0.25731427104849564, |
| "eval_rewards/tag_count_reward/mean": -0.011513157894736841, |
| "eval_rewards/tag_count_reward/std": 0.05738983381735651, |
| "eval_runtime": 281.7844, |
| "eval_samples_per_second": 0.532, |
| "eval_sampling/importance_sampling_ratio/max": 2.0, |
| "eval_sampling/importance_sampling_ratio/mean": 1.1433405562451011, |
| "eval_sampling/importance_sampling_ratio/min": 0.010514126441674418, |
| "eval_sampling/sampling_logp_difference/max": 15.934183672854775, |
| "eval_sampling/sampling_logp_difference/mean": 0.18365594979963804, |
| "eval_steps_per_second": 0.007, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 348, |
| "total_flos": 0.0, |
| "train_loss": 0.0006693344619362641, |
| "train_runtime": 4432.3723, |
| "train_samples_per_second": 0.63, |
| "train_steps_per_second": 0.079 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 348, |
| "num_input_tokens_seen": 38975747, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|