| { |
| "best_global_step": 60, |
| "best_metric": 0.000557390449102968, |
| "best_model_checkpoint": "data/DeepSeek-R1-Distill-Qwen-7B-Staged-2/checkpoint-60", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 120, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 431.0, |
| "completions/max_terminated_length": 431.0, |
| "completions/mean_length": 253.26416015625, |
| "completions/mean_terminated_length": 253.26416015625, |
| "completions/min_length": 137.0, |
| "completions/min_terminated_length": 137.0, |
| "entropy": 0.38996245712041855, |
| "epoch": 0.016666666666666666, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.04174759238958359, |
| "learning_rate": 1e-05, |
| "loss": 0.0019, |
| "num_tokens": 2067997.0, |
| "reward": 3.603942394256592, |
| "reward_std": 0.22614431381225586, |
| "rewards/ngram_repetition2/mean": 0.9632381200790405, |
| "rewards/ngram_repetition2/std": 0.020422853529453278, |
| "rewards/ngram_repetition3/mean": 0.9955652356147766, |
| "rewards/ngram_repetition3/std": 0.006543538998812437, |
| "rewards/symbolic_reward_accuracy/mean": 0.81787109375, |
| "rewards/symbolic_reward_accuracy/std": 0.386044979095459, |
| "rewards/symbolic_reward_partial_score/mean": 0.9390869140625, |
| "rewards/symbolic_reward_partial_score/std": 0.1457148790359497, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9525362849235535, |
| "rewards/thinking_answer_ratio_reward/std": 0.009070757776498795, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.095194935798645, |
| "sampling/importance_sampling_ratio/min": 0.0016489994013682008, |
| "sampling/sampling_logp_difference/max": 6.407586574554443, |
| "sampling/sampling_logp_difference/mean": 0.16560198366641998, |
| "step": 1 |
| }, |
| { |
| "clip_ratio/high_max": 0.16666666666666666, |
| "clip_ratio/high_mean": 0.07291666666666667, |
| "clip_ratio/low_mean": 0.3567708333333333, |
| "clip_ratio/low_min": 0.17708333333333334, |
| "clip_ratio/region_mean": 0.4296875, |
| "entropy": 0.3816731671492259, |
| "epoch": 0.06666666666666667, |
| "grad_norm": 0.03443225473165512, |
| "learning_rate": 1e-05, |
| "loss": 0.0008, |
| "step": 4 |
| }, |
| { |
| "clip_ratio/high_max": 0.14453125, |
| "clip_ratio/high_mean": 0.06396484375, |
| "clip_ratio/low_mean": 0.28955078125, |
| "clip_ratio/low_min": 0.15234375, |
| "clip_ratio/region_mean": 0.353515625, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 624.0, |
| "completions/max_terminated_length": 624.0, |
| "completions/mean_length": 248.92529296875, |
| "completions/mean_terminated_length": 248.92529296875, |
| "completions/min_length": 157.0, |
| "completions/min_terminated_length": 157.0, |
| "entropy": 0.39708597399294376, |
| "epoch": 0.13333333333333333, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.0413547120988369, |
| "learning_rate": 1e-05, |
| "loss": 0.0014, |
| "num_tokens": 4147140.0, |
| "reward": 3.5625743865966797, |
| "reward_std": 0.3117659091949463, |
| "rewards/ngram_repetition2/mean": 0.965080976486206, |
| "rewards/ngram_repetition2/std": 0.021233825013041496, |
| "rewards/ngram_repetition3/mean": 0.9958759546279907, |
| "rewards/ngram_repetition3/std": 0.006623170338571072, |
| "rewards/symbolic_reward_accuracy/mean": 0.8046875, |
| "rewards/symbolic_reward_accuracy/std": 0.3965378999710083, |
| "rewards/symbolic_reward_partial_score/mean": 0.924072265625, |
| "rewards/symbolic_reward_partial_score/std": 0.1713024228811264, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9517685174942017, |
| "rewards/thinking_answer_ratio_reward/std": 0.00964184757322073, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.0969865322113037, |
| "sampling/importance_sampling_ratio/min": 0.005477577913552523, |
| "sampling/sampling_logp_difference/max": 5.20709228515625, |
| "sampling/sampling_logp_difference/mean": 0.16714993119239807, |
| "step": 8 |
| }, |
| { |
| "clip_ratio/high_max": 0.13671875, |
| "clip_ratio/high_mean": 0.06396484375, |
| "clip_ratio/low_mean": 0.2880859375, |
| "clip_ratio/low_min": 0.15234375, |
| "clip_ratio/region_mean": 0.35205078125, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 449.0, |
| "completions/max_terminated_length": 449.0, |
| "completions/mean_length": 266.74072265625, |
| "completions/mean_terminated_length": 266.74072265625, |
| "completions/min_length": 165.0, |
| "completions/min_terminated_length": 165.0, |
| "entropy": 0.41098711267113686, |
| "epoch": 0.2, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.04287375509738922, |
| "learning_rate": 1e-05, |
| "loss": 0.0015, |
| "num_tokens": 6299505.0, |
| "reward": 3.46004581451416, |
| "reward_std": 0.3250929117202759, |
| "rewards/ngram_repetition2/mean": 0.9622728824615479, |
| "rewards/ngram_repetition2/std": 0.02167431451380253, |
| "rewards/ngram_repetition3/mean": 0.9954921007156372, |
| "rewards/ngram_repetition3/std": 0.007107834331691265, |
| "rewards/symbolic_reward_accuracy/mean": 0.75927734375, |
| "rewards/symbolic_reward_accuracy/std": 0.4276266396045685, |
| "rewards/symbolic_reward_partial_score/mean": 0.912353515625, |
| "rewards/symbolic_reward_partial_score/std": 0.1758050173521042, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9559780359268188, |
| "rewards/thinking_answer_ratio_reward/std": 0.007843377068638802, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1009982824325562, |
| "sampling/importance_sampling_ratio/min": 0.0013655413640663028, |
| "sampling/sampling_logp_difference/max": 6.5962042808532715, |
| "sampling/sampling_logp_difference/mean": 0.17280443012714386, |
| "step": 12 |
| }, |
| { |
| "clip_ratio/high_max": 0.1640625, |
| "clip_ratio/high_mean": 0.0791015625, |
| "clip_ratio/low_mean": 0.2392578125, |
| "clip_ratio/low_min": 0.12109375, |
| "clip_ratio/region_mean": 0.318359375, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 472.0, |
| "completions/max_terminated_length": 472.0, |
| "completions/mean_length": 274.34619140625, |
| "completions/mean_terminated_length": 274.34619140625, |
| "completions/min_length": 167.0, |
| "completions/min_terminated_length": 167.0, |
| "entropy": 0.41036204621195793, |
| "epoch": 0.26666666666666666, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.04127402976155281, |
| "learning_rate": 1e-05, |
| "loss": 0.0014, |
| "num_tokens": 8395638.0, |
| "reward": 3.7558231353759766, |
| "reward_std": 0.32028520107269287, |
| "rewards/ngram_repetition2/mean": 0.9618723392486572, |
| "rewards/ngram_repetition2/std": 0.022064488381147385, |
| "rewards/ngram_repetition3/mean": 0.9952791929244995, |
| "rewards/ngram_repetition3/std": 0.006986668799072504, |
| "rewards/symbolic_reward_accuracy/mean": 0.88330078125, |
| "rewards/symbolic_reward_accuracy/std": 0.32113996148109436, |
| "rewards/symbolic_reward_partial_score/mean": 0.9600830078125, |
| "rewards/symbolic_reward_partial_score/std": 0.12365403771400452, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9566828608512878, |
| "rewards/thinking_answer_ratio_reward/std": 0.008525538258254528, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1039669513702393, |
| "sampling/importance_sampling_ratio/min": 8.078159589786083e-05, |
| "sampling/sampling_logp_difference/max": 9.423761367797852, |
| "sampling/sampling_logp_difference/mean": 0.17331793904304504, |
| "step": 16 |
| }, |
| { |
| "clip_ratio/high_max": 0.1640625, |
| "clip_ratio/high_mean": 0.0947265625, |
| "clip_ratio/low_mean": 0.23095703125, |
| "clip_ratio/low_min": 0.12109375, |
| "clip_ratio/region_mean": 0.32568359375, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 546.0, |
| "completions/max_terminated_length": 546.0, |
| "completions/mean_length": 280.68359375, |
| "completions/mean_terminated_length": 280.68359375, |
| "completions/min_length": 147.0, |
| "completions/min_terminated_length": 147.0, |
| "entropy": 0.411681417375803, |
| "epoch": 0.3333333333333333, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.0389348641037941, |
| "learning_rate": 1e-05, |
| "loss": 0.0023, |
| "num_tokens": 10517102.0, |
| "reward": 3.778409242630005, |
| "reward_std": 0.297868549823761, |
| "rewards/ngram_repetition2/mean": 0.9617278575897217, |
| "rewards/ngram_repetition2/std": 0.022929087281227112, |
| "rewards/ngram_repetition3/mean": 0.9948153495788574, |
| "rewards/ngram_repetition3/std": 0.007431842386722565, |
| "rewards/symbolic_reward_accuracy/mean": 0.8955078125, |
| "rewards/symbolic_reward_accuracy/std": 0.3059726655483246, |
| "rewards/symbolic_reward_partial_score/mean": 0.958251953125, |
| "rewards/symbolic_reward_partial_score/std": 0.134090393781662, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9576171636581421, |
| "rewards/thinking_answer_ratio_reward/std": 0.007972889579832554, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1057276725769043, |
| "sampling/importance_sampling_ratio/min": 0.0023463296238332987, |
| "sampling/sampling_logp_difference/max": 6.054903030395508, |
| "sampling/sampling_logp_difference/mean": 0.17559605836868286, |
| "step": 20 |
| }, |
| { |
| "clip_ratio/high_max": 0.1796875, |
| "clip_ratio/high_mean": 0.091796875, |
| "clip_ratio/low_mean": 0.23193359375, |
| "clip_ratio/low_min": 0.10546875, |
| "clip_ratio/region_mean": 0.32373046875, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 475.0, |
| "completions/max_terminated_length": 475.0, |
| "completions/mean_length": 281.13232421875, |
| "completions/mean_terminated_length": 281.13232421875, |
| "completions/min_length": 162.0, |
| "completions/min_terminated_length": 162.0, |
| "entropy": 0.41807421669363976, |
| "epoch": 0.4, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.03686573728919029, |
| "learning_rate": 1e-05, |
| "loss": 0.0022, |
| "num_tokens": 12666877.0, |
| "reward": 3.802722454071045, |
| "reward_std": 0.280770868062973, |
| "rewards/ngram_repetition2/mean": 0.9632822871208191, |
| "rewards/ngram_repetition2/std": 0.02198909968137741, |
| "rewards/ngram_repetition3/mean": 0.995023250579834, |
| "rewards/ngram_repetition3/std": 0.007552496623247862, |
| "rewards/symbolic_reward_accuracy/mean": 0.9052734375, |
| "rewards/symbolic_reward_accuracy/std": 0.2929084002971649, |
| "rewards/symbolic_reward_partial_score/mean": 0.9630126953125, |
| "rewards/symbolic_reward_partial_score/std": 0.12233622372150421, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.957996129989624, |
| "rewards/thinking_answer_ratio_reward/std": 0.007673850283026695, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.109450101852417, |
| "sampling/importance_sampling_ratio/min": 0.0014090462354943156, |
| "sampling/sampling_logp_difference/max": 6.564842224121094, |
| "sampling/sampling_logp_difference/mean": 0.17978055775165558, |
| "step": 24 |
| }, |
| { |
| "clip_ratio/high_max": 0.22265625, |
| "clip_ratio/high_mean": 0.1142578125, |
| "clip_ratio/low_mean": 0.21435546875, |
| "clip_ratio/low_min": 0.08984375, |
| "clip_ratio/region_mean": 0.32861328125, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 786.0, |
| "completions/max_terminated_length": 786.0, |
| "completions/mean_length": 295.8984375, |
| "completions/mean_terminated_length": 295.8984375, |
| "completions/min_length": 169.0, |
| "completions/min_terminated_length": 169.0, |
| "entropy": 0.4283344931900501, |
| "epoch": 0.4666666666666667, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.04093848541378975, |
| "learning_rate": 1e-05, |
| "loss": 0.002, |
| "num_tokens": 14831853.0, |
| "reward": 3.85359525680542, |
| "reward_std": 0.2568969428539276, |
| "rewards/ngram_repetition2/mean": 0.9597057700157166, |
| "rewards/ngram_repetition2/std": 0.024175945669412613, |
| "rewards/ngram_repetition3/mean": 0.9941831827163696, |
| "rewards/ngram_repetition3/std": 0.008336109109222889, |
| "rewards/symbolic_reward_accuracy/mean": 0.92578125, |
| "rewards/symbolic_reward_accuracy/std": 0.2621905505657196, |
| "rewards/symbolic_reward_partial_score/mean": 0.972900390625, |
| "rewards/symbolic_reward_partial_score/std": 0.10685121268033981, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9593456387519836, |
| "rewards/thinking_answer_ratio_reward/std": 0.007558419369161129, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1124420166015625, |
| "sampling/importance_sampling_ratio/min": 0.0036084535531699657, |
| "sampling/sampling_logp_difference/max": 5.624475955963135, |
| "sampling/sampling_logp_difference/mean": 0.1830388605594635, |
| "step": 28 |
| }, |
| { |
| "clip_ratio/high_max": 0.203125, |
| "clip_ratio/high_mean": 0.1083984375, |
| "clip_ratio/low_mean": 0.2470703125, |
| "clip_ratio/low_min": 0.12890625, |
| "clip_ratio/region_mean": 0.35546875, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 547.0, |
| "completions/max_terminated_length": 547.0, |
| "completions/mean_length": 298.046875, |
| "completions/mean_terminated_length": 298.046875, |
| "completions/min_length": 179.0, |
| "completions/min_terminated_length": 179.0, |
| "entropy": 0.431485241279006, |
| "epoch": 0.5333333333333333, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.03429108485579491, |
| "learning_rate": 1e-05, |
| "loss": 0.0025, |
| "num_tokens": 16993549.0, |
| "reward": 3.9130945205688477, |
| "reward_std": 0.15431927144527435, |
| "rewards/ngram_repetition2/mean": 0.9620877504348755, |
| "rewards/ngram_repetition2/std": 0.0209511611610651, |
| "rewards/ngram_repetition3/mean": 0.995103120803833, |
| "rewards/ngram_repetition3/std": 0.0067704287357628345, |
| "rewards/symbolic_reward_accuracy/mean": 0.9521484375, |
| "rewards/symbolic_reward_accuracy/std": 0.21350421011447906, |
| "rewards/symbolic_reward_partial_score/mean": 0.9796142578125, |
| "rewards/symbolic_reward_partial_score/std": 0.10119316726922989, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9611598253250122, |
| "rewards/thinking_answer_ratio_reward/std": 0.006729719694703817, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1149195432662964, |
| "sampling/importance_sampling_ratio/min": 0.001943365903571248, |
| "sampling/sampling_logp_difference/max": 6.24333381652832, |
| "sampling/sampling_logp_difference/mean": 0.18532943725585938, |
| "step": 32 |
| }, |
| { |
| "clip_ratio/high_max": 0.203125, |
| "clip_ratio/high_mean": 0.08837890625, |
| "clip_ratio/low_mean": 0.2294921875, |
| "clip_ratio/low_min": 0.1015625, |
| "clip_ratio/region_mean": 0.31787109375, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 582.0, |
| "completions/max_terminated_length": 582.0, |
| "completions/mean_length": 298.3740234375, |
| "completions/mean_terminated_length": 298.3740234375, |
| "completions/min_length": 192.0, |
| "completions/min_terminated_length": 192.0, |
| "entropy": 0.4310462474822998, |
| "epoch": 0.6, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.03929264098405838, |
| "learning_rate": 1e-05, |
| "loss": 0.0026, |
| "num_tokens": 19169931.0, |
| "reward": 3.8662376403808594, |
| "reward_std": 0.21625936031341553, |
| "rewards/ngram_repetition2/mean": 0.9633911848068237, |
| "rewards/ngram_repetition2/std": 0.020250339061021805, |
| "rewards/ngram_repetition3/mean": 0.99515700340271, |
| "rewards/ngram_repetition3/std": 0.006854338105767965, |
| "rewards/symbolic_reward_accuracy/mean": 0.931640625, |
| "rewards/symbolic_reward_accuracy/std": 0.2524232268333435, |
| "rewards/symbolic_reward_partial_score/mean": 0.9737548828125, |
| "rewards/symbolic_reward_partial_score/std": 0.10834010690450668, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9616156816482544, |
| "rewards/thinking_answer_ratio_reward/std": 0.006748661864548922, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1172317266464233, |
| "sampling/importance_sampling_ratio/min": 0.0015663664089515805, |
| "sampling/sampling_logp_difference/max": 6.458996772766113, |
| "sampling/sampling_logp_difference/mean": 0.18730762600898743, |
| "step": 36 |
| }, |
| { |
| "clip_ratio/high_max": 0.16796875, |
| "clip_ratio/high_mean": 0.09033203125, |
| "clip_ratio/low_mean": 0.25537109375, |
| "clip_ratio/low_min": 0.12890625, |
| "clip_ratio/region_mean": 0.345703125, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 909.0, |
| "completions/max_terminated_length": 909.0, |
| "completions/mean_length": 299.11181640625, |
| "completions/mean_terminated_length": 299.11181640625, |
| "completions/min_length": 193.0, |
| "completions/min_terminated_length": 193.0, |
| "entropy": 0.4281404986977577, |
| "epoch": 0.6666666666666666, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.035706695169210434, |
| "learning_rate": 1e-05, |
| "loss": 0.0028, |
| "num_tokens": 21357168.0, |
| "reward": 3.8661556243896484, |
| "reward_std": 0.10097986459732056, |
| "rewards/ngram_repetition2/mean": 0.9660295248031616, |
| "rewards/ngram_repetition2/std": 0.021356722339987755, |
| "rewards/ngram_repetition3/mean": 0.9956341981887817, |
| "rewards/ngram_repetition3/std": 0.007333936635404825, |
| "rewards/symbolic_reward_accuracy/mean": 0.93017578125, |
| "rewards/symbolic_reward_accuracy/std": 0.254912793636322, |
| "rewards/symbolic_reward_partial_score/mean": 0.9765625, |
| "rewards/symbolic_reward_partial_score/std": 0.09246132522821426, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9624710083007812, |
| "rewards/thinking_answer_ratio_reward/std": 0.005544988438487053, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1155461072921753, |
| "sampling/importance_sampling_ratio/min": 0.0017055901698768139, |
| "sampling/sampling_logp_difference/max": 6.373844146728516, |
| "sampling/sampling_logp_difference/mean": 0.18562009930610657, |
| "step": 40 |
| }, |
| { |
| "clip_ratio/high_max": 0.15625, |
| "clip_ratio/high_mean": 0.07568359375, |
| "clip_ratio/low_mean": 0.26416015625, |
| "clip_ratio/low_min": 0.1328125, |
| "clip_ratio/region_mean": 0.33984375, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 718.0, |
| "completions/max_terminated_length": 718.0, |
| "completions/mean_length": 297.33154296875, |
| "completions/mean_terminated_length": 297.33154296875, |
| "completions/min_length": 196.0, |
| "completions/min_terminated_length": 196.0, |
| "entropy": 0.4222128689289093, |
| "epoch": 0.7333333333333333, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.027303706854581833, |
| "learning_rate": 1e-05, |
| "loss": 0.0018, |
| "num_tokens": 23521431.0, |
| "reward": 3.8894946575164795, |
| "reward_std": 0.110419362783432, |
| "rewards/ngram_repetition2/mean": 0.9679132699966431, |
| "rewards/ngram_repetition2/std": 0.020372966304421425, |
| "rewards/ngram_repetition3/mean": 0.9960880875587463, |
| "rewards/ngram_repetition3/std": 0.0068605802953243256, |
| "rewards/symbolic_reward_accuracy/mean": 0.93994140625, |
| "rewards/symbolic_reward_accuracy/std": 0.23765340447425842, |
| "rewards/symbolic_reward_partial_score/mean": 0.9803466796875, |
| "rewards/symbolic_reward_partial_score/std": 0.08386269956827164, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9625216722488403, |
| "rewards/thinking_answer_ratio_reward/std": 0.004792334046214819, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.114952802658081, |
| "sampling/importance_sampling_ratio/min": 0.002068887697532773, |
| "sampling/sampling_logp_difference/max": 6.180744171142578, |
| "sampling/sampling_logp_difference/mean": 0.18375319242477417, |
| "step": 44 |
| }, |
| { |
| "clip_ratio/high_max": 0.20703125, |
| "clip_ratio/high_mean": 0.099609375, |
| "clip_ratio/low_mean": 0.2666015625, |
| "clip_ratio/low_min": 0.14453125, |
| "clip_ratio/region_mean": 0.3662109375, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 558.0, |
| "completions/max_terminated_length": 558.0, |
| "completions/mean_length": 317.89453125, |
| "completions/mean_terminated_length": 317.89453125, |
| "completions/min_length": 220.0, |
| "completions/min_terminated_length": 220.0, |
| "entropy": 0.4387316107749939, |
| "epoch": 0.8, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.03348841145634651, |
| "learning_rate": 1e-05, |
| "loss": 0.0023, |
| "num_tokens": 25740159.0, |
| "reward": 3.916114091873169, |
| "reward_std": 0.11179815232753754, |
| "rewards/ngram_repetition2/mean": 0.9658024311065674, |
| "rewards/ngram_repetition2/std": 0.016465168446302414, |
| "rewards/ngram_repetition3/mean": 0.9962982535362244, |
| "rewards/ngram_repetition3/std": 0.005400184541940689, |
| "rewards/symbolic_reward_accuracy/mean": 0.951171875, |
| "rewards/symbolic_reward_accuracy/std": 0.21556119620800018, |
| "rewards/symbolic_reward_partial_score/mean": 0.9844970703125, |
| "rewards/symbolic_reward_partial_score/std": 0.07353945821523666, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9652310609817505, |
| "rewards/thinking_answer_ratio_reward/std": 0.00431449618190527, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1190249919891357, |
| "sampling/importance_sampling_ratio/min": 0.001858800882473588, |
| "sampling/sampling_logp_difference/max": 6.287823677062988, |
| "sampling/sampling_logp_difference/mean": 0.18923214077949524, |
| "step": 48 |
| }, |
| { |
| "clip_ratio/high_max": 0.15234375, |
| "clip_ratio/high_mean": 0.0849609375, |
| "clip_ratio/low_mean": 0.26806640625, |
| "clip_ratio/low_min": 0.15234375, |
| "clip_ratio/region_mean": 0.35302734375, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 558.0, |
| "completions/max_terminated_length": 558.0, |
| "completions/mean_length": 317.11865234375, |
| "completions/mean_terminated_length": 317.11865234375, |
| "completions/min_length": 204.0, |
| "completions/min_terminated_length": 204.0, |
| "entropy": 0.4330588784068823, |
| "epoch": 0.8666666666666667, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.031064650043845177, |
| "learning_rate": 1e-05, |
| "loss": 0.0013, |
| "num_tokens": 27939250.0, |
| "reward": 3.918456554412842, |
| "reward_std": 0.07680399715900421, |
| "rewards/ngram_repetition2/mean": 0.9678490161895752, |
| "rewards/ngram_repetition2/std": 0.016260815784335136, |
| "rewards/ngram_repetition3/mean": 0.9966074824333191, |
| "rewards/ngram_repetition3/std": 0.005310139153152704, |
| "rewards/symbolic_reward_accuracy/mean": 0.9521484375, |
| "rewards/symbolic_reward_accuracy/std": 0.21350421011447906, |
| "rewards/symbolic_reward_partial_score/mean": 0.98486328125, |
| "rewards/symbolic_reward_partial_score/std": 0.07214950025081635, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9651836156845093, |
| "rewards/thinking_answer_ratio_reward/std": 0.004080226644873619, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1175155639648438, |
| "sampling/importance_sampling_ratio/min": 0.002885101828724146, |
| "sampling/sampling_logp_difference/max": 5.8481950759887695, |
| "sampling/sampling_logp_difference/mean": 0.18709684908390045, |
| "step": 52 |
| }, |
| { |
| "clip_ratio/high_max": 0.18359375, |
| "clip_ratio/high_mean": 0.09423828125, |
| "clip_ratio/low_mean": 0.2568359375, |
| "clip_ratio/low_min": 0.1171875, |
| "clip_ratio/region_mean": 0.35107421875, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 518.0, |
| "completions/max_terminated_length": 518.0, |
| "completions/mean_length": 330.2392578125, |
| "completions/mean_terminated_length": 330.2392578125, |
| "completions/min_length": 214.0, |
| "completions/min_terminated_length": 214.0, |
| "entropy": 0.4368158672004938, |
| "epoch": 0.9333333333333333, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.0355696976184845, |
| "learning_rate": 1e-05, |
| "loss": 0.0019, |
| "num_tokens": 30189276.0, |
| "reward": 3.906731128692627, |
| "reward_std": 0.1086559072136879, |
| "rewards/ngram_repetition2/mean": 0.9662027359008789, |
| "rewards/ngram_repetition2/std": 0.015879785642027855, |
| "rewards/ngram_repetition3/mean": 0.996091365814209, |
| "rewards/ngram_repetition3/std": 0.005460316780954599, |
| "rewards/symbolic_reward_accuracy/mean": 0.94873046875, |
| "rewards/symbolic_reward_accuracy/std": 0.22060084342956543, |
| "rewards/symbolic_reward_partial_score/mean": 0.97998046875, |
| "rewards/symbolic_reward_partial_score/std": 0.09358829259872437, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9666886925697327, |
| "rewards/thinking_answer_ratio_reward/std": 0.004018484149128199, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1205523014068604, |
| "sampling/importance_sampling_ratio/min": 0.0029683702159672976, |
| "sampling/sampling_logp_difference/max": 5.819742202758789, |
| "sampling/sampling_logp_difference/mean": 0.19034436345100403, |
| "step": 56 |
| }, |
| { |
| "clip_ratio/high_max": 0.16015625, |
| "clip_ratio/high_mean": 0.0791015625, |
| "clip_ratio/low_mean": 0.26416015625, |
| "clip_ratio/low_min": 0.1484375, |
| "clip_ratio/region_mean": 0.34326171875, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 616.0, |
| "completions/max_terminated_length": 616.0, |
| "completions/mean_length": 335.173828125, |
| "completions/mean_terminated_length": 335.173828125, |
| "completions/min_length": 237.0, |
| "completions/min_terminated_length": 237.0, |
| "entropy": 0.4378073513507843, |
| "epoch": 1.0, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.03477643057703972, |
| "learning_rate": 1e-05, |
| "loss": 0.0014, |
| "num_tokens": 32438720.0, |
| "reward": 3.88734769821167, |
| "reward_std": 0.06660252809524536, |
| "rewards/ngram_repetition2/mean": 0.9677587747573853, |
| "rewards/ngram_repetition2/std": 0.017478443682193756, |
| "rewards/ngram_repetition3/mean": 0.9965513944625854, |
| "rewards/ngram_repetition3/std": 0.005568178836256266, |
| "rewards/symbolic_reward_accuracy/mean": 0.93994140625, |
| "rewards/symbolic_reward_accuracy/std": 0.23765340447425842, |
| "rewards/symbolic_reward_partial_score/mean": 0.9781494140625, |
| "rewards/symbolic_reward_partial_score/std": 0.09496273845434189, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9672399759292603, |
| "rewards/thinking_answer_ratio_reward/std": 0.003744090674445033, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1204638481140137, |
| "sampling/importance_sampling_ratio/min": 0.002623903099447489, |
| "sampling/sampling_logp_difference/max": 5.943092346191406, |
| "sampling/sampling_logp_difference/mean": 0.1902998983860016, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_clip_ratio/high_max": 0.0, |
| "eval_clip_ratio/high_mean": 0.0, |
| "eval_clip_ratio/low_mean": 0.0, |
| "eval_clip_ratio/low_min": 0.0, |
| "eval_clip_ratio/region_mean": 0.0, |
| "eval_completions/clipped_ratio": 0.0, |
| "eval_completions/max_length": 436.36842105263156, |
| "eval_completions/max_terminated_length": 436.36842105263156, |
| "eval_completions/mean_length": 328.875, |
| "eval_completions/mean_terminated_length": 328.875, |
| "eval_completions/min_length": 248.89473684210526, |
| "eval_completions/min_terminated_length": 248.89473684210526, |
| "eval_entropy": 0.44058212324192647, |
| "eval_frac_reward_zero_std": 0.0, |
| "eval_loss": 0.000557390449102968, |
| "eval_num_tokens": 32438720.0, |
| "eval_reward": 4.020364962126079, |
| "eval_reward_std": 0.030778637624232368, |
| "eval_rewards/ngram_repetition2/mean": 0.9677612467816001, |
| "eval_rewards/ngram_repetition2/std": 0.014938431252774439, |
| "eval_rewards/ngram_repetition3/mean": 0.9964643026653089, |
| "eval_rewards/ngram_repetition3/std": 0.004579812300538546, |
| "eval_rewards/symbolic_reward_accuracy/mean": 0.9962993421052632, |
| "eval_rewards/symbolic_reward_accuracy/std": 0.03590594467363859, |
| "eval_rewards/symbolic_reward_partial_score/mean": 0.9984580592105263, |
| "eval_rewards/symbolic_reward_partial_score/std": 0.015954513494905672, |
| "eval_rewards/tag_count_reward/mean": 1.0, |
| "eval_rewards/tag_count_reward/std": 0.0, |
| "eval_rewards/thinking_answer_ratio_reward/mean": 0.9665957563801816, |
| "eval_rewards/thinking_answer_ratio_reward/std": 0.003913806052878499, |
| "eval_runtime": 432.901, |
| "eval_samples_per_second": 0.346, |
| "eval_sampling/importance_sampling_ratio/max": 2.0, |
| "eval_sampling/importance_sampling_ratio/mean": 1.1209219003978528, |
| "eval_sampling/importance_sampling_ratio/min": 0.010366919444334743, |
| "eval_sampling/sampling_logp_difference/max": 4.7942703146683545, |
| "eval_sampling/sampling_logp_difference/mean": 0.1907721946113988, |
| "eval_steps_per_second": 0.005, |
| "step": 60 |
| }, |
| { |
| "clip_ratio/high_max": 0.16015625, |
| "clip_ratio/high_mean": 0.0712890625, |
| "clip_ratio/low_mean": 0.2685546875, |
| "clip_ratio/low_min": 0.14453125, |
| "clip_ratio/region_mean": 0.33984375, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 602.0, |
| "completions/max_terminated_length": 602.0, |
| "completions/mean_length": 336.6640625, |
| "completions/mean_terminated_length": 336.6640625, |
| "completions/min_length": 238.0, |
| "completions/min_terminated_length": 238.0, |
| "entropy": 0.43299879133701324, |
| "epoch": 1.0666666666666667, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.03558320552110672, |
| "learning_rate": 1e-05, |
| "loss": 0.0019, |
| "num_tokens": 34676176.0, |
| "reward": 3.9476747512817383, |
| "reward_std": 0.08613574504852295, |
| "rewards/ngram_repetition2/mean": 0.9695593118667603, |
| "rewards/ngram_repetition2/std": 0.01567245088517666, |
| "rewards/ngram_repetition3/mean": 0.9968794584274292, |
| "rewards/ngram_repetition3/std": 0.004831254947930574, |
| "rewards/symbolic_reward_accuracy/mean": 0.96630859375, |
| "rewards/symbolic_reward_accuracy/std": 0.18047769367694855, |
| "rewards/symbolic_reward_partial_score/mean": 0.9857177734375, |
| "rewards/symbolic_reward_partial_score/std": 0.08201702684164047, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9675599336624146, |
| "rewards/thinking_answer_ratio_reward/std": 0.003663764800876379, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1207427978515625, |
| "sampling/importance_sampling_ratio/min": 0.0029126314911991358, |
| "sampling/sampling_logp_difference/max": 5.838698387145996, |
| "sampling/sampling_logp_difference/mean": 0.18947181105613708, |
| "step": 64 |
| }, |
| { |
| "clip_ratio/high_max": 0.1484375, |
| "clip_ratio/high_mean": 0.07958984375, |
| "clip_ratio/low_mean": 0.26123046875, |
| "clip_ratio/low_min": 0.12890625, |
| "clip_ratio/region_mean": 0.3408203125, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 622.0, |
| "completions/max_terminated_length": 622.0, |
| "completions/mean_length": 347.7255859375, |
| "completions/mean_terminated_length": 347.7255859375, |
| "completions/min_length": 230.0, |
| "completions/min_terminated_length": 230.0, |
| "entropy": 0.43613532558083534, |
| "epoch": 1.1333333333333333, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.03729822859168053, |
| "learning_rate": 1e-05, |
| "loss": 0.002, |
| "num_tokens": 36963358.0, |
| "reward": 3.978569269180298, |
| "reward_std": 0.0661315992474556, |
| "rewards/ngram_repetition2/mean": 0.9695651531219482, |
| "rewards/ngram_repetition2/std": 0.015225501731038094, |
| "rewards/ngram_repetition3/mean": 0.9969500303268433, |
| "rewards/ngram_repetition3/std": 0.004695891868323088, |
| "rewards/symbolic_reward_accuracy/mean": 0.978515625, |
| "rewards/symbolic_reward_accuracy/std": 0.14502781629562378, |
| "rewards/symbolic_reward_partial_score/mean": 0.9921875, |
| "rewards/symbolic_reward_partial_score/std": 0.05580603703856468, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9685267210006714, |
| "rewards/thinking_answer_ratio_reward/std": 0.0036575142294168472, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1233885288238525, |
| "sampling/importance_sampling_ratio/min": 0.0019186872523277998, |
| "sampling/sampling_logp_difference/max": 6.2561140060424805, |
| "sampling/sampling_logp_difference/mean": 0.19366593658924103, |
| "step": 68 |
| }, |
| { |
| "clip_ratio/high_max": 0.1796875, |
| "clip_ratio/high_mean": 0.080078125, |
| "clip_ratio/low_mean": 0.2646484375, |
| "clip_ratio/low_min": 0.15234375, |
| "clip_ratio/region_mean": 0.3447265625, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 619.0, |
| "completions/max_terminated_length": 619.0, |
| "completions/mean_length": 360.83251953125, |
| "completions/mean_terminated_length": 360.83251953125, |
| "completions/min_length": 265.0, |
| "completions/min_terminated_length": 265.0, |
| "entropy": 0.44109424389898777, |
| "epoch": 1.2, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.037427984178066254, |
| "learning_rate": 1e-05, |
| "loss": 0.0021, |
| "num_tokens": 39281671.0, |
| "reward": 3.9073996543884277, |
| "reward_std": 0.047650568187236786, |
| "rewards/ngram_repetition2/mean": 0.9681066274642944, |
| "rewards/ngram_repetition2/std": 0.015408935956656933, |
| "rewards/ngram_repetition3/mean": 0.9968547224998474, |
| "rewards/ngram_repetition3/std": 0.004804851021617651, |
| "rewards/symbolic_reward_accuracy/mean": 0.94775390625, |
| "rewards/symbolic_reward_accuracy/std": 0.22257724404335022, |
| "rewards/symbolic_reward_partial_score/mean": 0.9825439453125, |
| "rewards/symbolic_reward_partial_score/std": 0.0814003199338913, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9698253870010376, |
| "rewards/thinking_answer_ratio_reward/std": 0.0032405967358499765, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1255016326904297, |
| "sampling/importance_sampling_ratio/min": 0.001506246393546462, |
| "sampling/sampling_logp_difference/max": 6.498134613037109, |
| "sampling/sampling_logp_difference/mean": 0.19701868295669556, |
| "step": 72 |
| }, |
| { |
| "clip_ratio/high_max": 0.16796875, |
| "clip_ratio/high_mean": 0.0751953125, |
| "clip_ratio/low_mean": 0.2666015625, |
| "clip_ratio/low_min": 0.125, |
| "clip_ratio/region_mean": 0.341796875, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 692.0, |
| "completions/max_terminated_length": 692.0, |
| "completions/mean_length": 365.287109375, |
| "completions/mean_terminated_length": 365.287109375, |
| "completions/min_length": 258.0, |
| "completions/min_terminated_length": 258.0, |
| "entropy": 0.44637384451925755, |
| "epoch": 1.2666666666666666, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.03856123611330986, |
| "learning_rate": 1e-05, |
| "loss": 0.0023, |
| "num_tokens": 41580755.0, |
| "reward": 3.949028491973877, |
| "reward_std": 0.0856785699725151, |
| "rewards/ngram_repetition2/mean": 0.9681618213653564, |
| "rewards/ngram_repetition2/std": 0.01548727136105299, |
| "rewards/ngram_repetition3/mean": 0.9968844652175903, |
| "rewards/ngram_repetition3/std": 0.004666218534111977, |
| "rewards/symbolic_reward_accuracy/mean": 0.96630859375, |
| "rewards/symbolic_reward_accuracy/std": 0.18047769367694855, |
| "rewards/symbolic_reward_partial_score/mean": 0.987060546875, |
| "rewards/symbolic_reward_partial_score/std": 0.07464982569217682, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.970026969909668, |
| "rewards/thinking_answer_ratio_reward/std": 0.003470814088359475, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1290401220321655, |
| "sampling/importance_sampling_ratio/min": 0.0027039332780987024, |
| "sampling/sampling_logp_difference/max": 5.913047790527344, |
| "sampling/sampling_logp_difference/mean": 0.2006792426109314, |
| "step": 76 |
| }, |
| { |
| "clip_ratio/high_max": 0.17578125, |
| "clip_ratio/high_mean": 0.0830078125, |
| "clip_ratio/low_mean": 0.244140625, |
| "clip_ratio/low_min": 0.13671875, |
| "clip_ratio/region_mean": 0.3271484375, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 611.0, |
| "completions/max_terminated_length": 611.0, |
| "completions/mean_length": 380.34423828125, |
| "completions/mean_terminated_length": 380.34423828125, |
| "completions/min_length": 265.0, |
| "completions/min_terminated_length": 265.0, |
| "entropy": 0.47546265460550785, |
| "epoch": 1.3333333333333333, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.03867423161864281, |
| "learning_rate": 1e-05, |
| "loss": 0.003, |
| "num_tokens": 43912980.0, |
| "reward": 3.915666103363037, |
| "reward_std": 0.1401577889919281, |
| "rewards/ngram_repetition2/mean": 0.9641463160514832, |
| "rewards/ngram_repetition2/std": 0.016802899539470673, |
| "rewards/ngram_repetition3/mean": 0.99601149559021, |
| "rewards/ngram_repetition3/std": 0.0052982522174716, |
| "rewards/symbolic_reward_accuracy/mean": 0.95263671875, |
| "rewards/symbolic_reward_accuracy/std": 0.21246656775474548, |
| "rewards/symbolic_reward_partial_score/mean": 0.9810791015625, |
| "rewards/symbolic_reward_partial_score/std": 0.08897262811660767, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9711929559707642, |
| "rewards/thinking_answer_ratio_reward/std": 0.0034950862172991037, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1358217000961304, |
| "sampling/importance_sampling_ratio/min": 0.0015437521506100893, |
| "sampling/sampling_logp_difference/max": 6.473539352416992, |
| "sampling/sampling_logp_difference/mean": 0.2107161283493042, |
| "step": 80 |
| }, |
| { |
| "clip_ratio/high_max": 0.1796875, |
| "clip_ratio/high_mean": 0.07666015625, |
| "clip_ratio/low_mean": 0.2470703125, |
| "clip_ratio/low_min": 0.12109375, |
| "clip_ratio/region_mean": 0.32373046875, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 685.0, |
| "completions/max_terminated_length": 685.0, |
| "completions/mean_length": 390.22705078125, |
| "completions/mean_terminated_length": 390.22705078125, |
| "completions/min_length": 265.0, |
| "completions/min_terminated_length": 265.0, |
| "entropy": 0.47949288971722126, |
| "epoch": 1.4, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.03864947333931923, |
| "learning_rate": 1e-05, |
| "loss": 0.0029, |
| "num_tokens": 46284517.0, |
| "reward": 3.903589963912964, |
| "reward_std": 0.1544736623764038, |
| "rewards/ngram_repetition2/mean": 0.9642166495323181, |
| "rewards/ngram_repetition2/std": 0.01607567071914673, |
| "rewards/ngram_repetition3/mean": 0.9960227608680725, |
| "rewards/ngram_repetition3/std": 0.00498650036752224, |
| "rewards/symbolic_reward_accuracy/mean": 0.94873046875, |
| "rewards/symbolic_reward_accuracy/std": 0.22060084342956543, |
| "rewards/symbolic_reward_partial_score/mean": 0.976806640625, |
| "rewards/symbolic_reward_partial_score/std": 0.10547613352537155, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9719994068145752, |
| "rewards/thinking_answer_ratio_reward/std": 0.0034010012168437243, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.138932466506958, |
| "sampling/importance_sampling_ratio/min": 0.0013370462693274021, |
| "sampling/sampling_logp_difference/max": 6.617292404174805, |
| "sampling/sampling_logp_difference/mean": 0.2153591811656952, |
| "step": 84 |
| }, |
| { |
| "clip_ratio/high_max": 0.1640625, |
| "clip_ratio/high_mean": 0.07421875, |
| "clip_ratio/low_mean": 0.23486328125, |
| "clip_ratio/low_min": 0.1484375, |
| "clip_ratio/region_mean": 0.30908203125, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 615.0, |
| "completions/max_terminated_length": 615.0, |
| "completions/mean_length": 394.150390625, |
| "completions/mean_terminated_length": 394.150390625, |
| "completions/min_length": 271.0, |
| "completions/min_terminated_length": 271.0, |
| "entropy": 0.5021626558154821, |
| "epoch": 1.4666666666666668, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.05966160073876381, |
| "learning_rate": 1e-05, |
| "loss": 0.003, |
| "num_tokens": 48626649.0, |
| "reward": 3.880753517150879, |
| "reward_std": 0.12686511874198914, |
| "rewards/ngram_repetition2/mean": 0.9631878733634949, |
| "rewards/ngram_repetition2/std": 0.015505960211157799, |
| "rewards/ngram_repetition3/mean": 0.995897650718689, |
| "rewards/ngram_repetition3/std": 0.0049866680055856705, |
| "rewards/symbolic_reward_accuracy/mean": 0.93701171875, |
| "rewards/symbolic_reward_accuracy/std": 0.24300122261047363, |
| "rewards/symbolic_reward_partial_score/mean": 0.9774169921875, |
| "rewards/symbolic_reward_partial_score/std": 0.09701967239379883, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9722087383270264, |
| "rewards/thinking_answer_ratio_reward/std": 0.0038304529152810574, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1457538604736328, |
| "sampling/importance_sampling_ratio/min": 0.001241249148733914, |
| "sampling/sampling_logp_difference/max": 6.69163703918457, |
| "sampling/sampling_logp_difference/mean": 0.22615361213684082, |
| "step": 88 |
| }, |
| { |
| "clip_ratio/high_max": 0.08984375, |
| "clip_ratio/high_mean": 0.03173828125, |
| "clip_ratio/low_mean": 0.2744140625, |
| "clip_ratio/low_min": 0.109375, |
| "clip_ratio/region_mean": 0.30615234375, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 608.0, |
| "completions/max_terminated_length": 608.0, |
| "completions/mean_length": 402.93798828125, |
| "completions/mean_terminated_length": 402.93798828125, |
| "completions/min_length": 267.0, |
| "completions/min_terminated_length": 267.0, |
| "entropy": 0.5079601276665926, |
| "epoch": 1.5333333333333332, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.06473611295223236, |
| "learning_rate": 1e-05, |
| "loss": 0.0032, |
| "num_tokens": 51035546.0, |
| "reward": 3.862441062927246, |
| "reward_std": 0.14964377880096436, |
| "rewards/ngram_repetition2/mean": 0.9625787734985352, |
| "rewards/ngram_repetition2/std": 0.015453252010047436, |
| "rewards/ngram_repetition3/mean": 0.9958549737930298, |
| "rewards/ngram_repetition3/std": 0.0049506500363349915, |
| "rewards/symbolic_reward_accuracy/mean": 0.931640625, |
| "rewards/symbolic_reward_accuracy/std": 0.2524232268333435, |
| "rewards/symbolic_reward_partial_score/mean": 0.9698486328125, |
| "rewards/symbolic_reward_partial_score/std": 0.12419875711202621, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9726754426956177, |
| "rewards/thinking_answer_ratio_reward/std": 0.0038639178965240717, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.149343729019165, |
| "sampling/importance_sampling_ratio/min": 0.0010150460293516517, |
| "sampling/sampling_logp_difference/max": 6.892821311950684, |
| "sampling/sampling_logp_difference/mean": 0.23112602531909943, |
| "step": 92 |
| }, |
| { |
| "clip_ratio/high_max": 0.15625, |
| "clip_ratio/high_mean": 0.07275390625, |
| "clip_ratio/low_mean": 0.2626953125, |
| "clip_ratio/low_min": 0.15234375, |
| "clip_ratio/region_mean": 0.33544921875, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 667.0, |
| "completions/max_terminated_length": 667.0, |
| "completions/mean_length": 426.3505859375, |
| "completions/mean_terminated_length": 426.3505859375, |
| "completions/min_length": 287.0, |
| "completions/min_terminated_length": 287.0, |
| "entropy": 0.5987689010798931, |
| "epoch": 1.6, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.0717100203037262, |
| "learning_rate": 1e-05, |
| "loss": 0.0048, |
| "num_tokens": 53520488.0, |
| "reward": 3.874765634536743, |
| "reward_std": 0.17516621947288513, |
| "rewards/ngram_repetition2/mean": 0.9517251253128052, |
| "rewards/ngram_repetition2/std": 0.01955697126686573, |
| "rewards/ngram_repetition3/mean": 0.9931901693344116, |
| "rewards/ngram_repetition3/std": 0.006746932398527861, |
| "rewards/symbolic_reward_accuracy/mean": 0.9345703125, |
| "rewards/symbolic_reward_accuracy/std": 0.24734291434288025, |
| "rewards/symbolic_reward_partial_score/mean": 0.9764404296875, |
| "rewards/symbolic_reward_partial_score/std": 0.09835170954465866, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9735493659973145, |
| "rewards/thinking_answer_ratio_reward/std": 0.004703040700405836, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1692607402801514, |
| "sampling/importance_sampling_ratio/min": 0.0016325593460351229, |
| "sampling/sampling_logp_difference/max": 6.417606353759766, |
| "sampling/sampling_logp_difference/mean": 0.25711047649383545, |
| "step": 96 |
| }, |
| { |
| "clip_ratio/high_max": 0.08984375, |
| "clip_ratio/high_mean": 0.0322265625, |
| "clip_ratio/low_mean": 0.294921875, |
| "clip_ratio/low_min": 0.16015625, |
| "clip_ratio/region_mean": 0.3271484375, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 730.0, |
| "completions/max_terminated_length": 730.0, |
| "completions/mean_length": 446.4931640625, |
| "completions/mean_terminated_length": 446.4931640625, |
| "completions/min_length": 248.0, |
| "completions/min_terminated_length": 248.0, |
| "entropy": 0.6369560994207859, |
| "epoch": 1.6666666666666665, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.08183422684669495, |
| "learning_rate": 1e-05, |
| "loss": 0.0057, |
| "num_tokens": 55986906.0, |
| "reward": 3.8245627880096436, |
| "reward_std": 0.161258727312088, |
| "rewards/ngram_repetition2/mean": 0.9479941129684448, |
| "rewards/ngram_repetition2/std": 0.02070331759750843, |
| "rewards/ngram_repetition3/mean": 0.9925798177719116, |
| "rewards/ngram_repetition3/std": 0.006819311063736677, |
| "rewards/symbolic_reward_accuracy/mean": 0.912109375, |
| "rewards/symbolic_reward_accuracy/std": 0.28320491313934326, |
| "rewards/symbolic_reward_partial_score/mean": 0.97119140625, |
| "rewards/symbolic_reward_partial_score/std": 0.10378827154636383, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9746991991996765, |
| "rewards/thinking_answer_ratio_reward/std": 0.004666423425078392, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.1847954988479614, |
| "sampling/importance_sampling_ratio/min": 0.002613254589959979, |
| "sampling/sampling_logp_difference/max": 5.9471588134765625, |
| "sampling/sampling_logp_difference/mean": 0.27063658833503723, |
| "step": 100 |
| }, |
| { |
| "clip_ratio/high_max": 0.12109375, |
| "clip_ratio/high_mean": 0.052734375, |
| "clip_ratio/low_mean": 0.2841796875, |
| "clip_ratio/low_min": 0.15234375, |
| "clip_ratio/region_mean": 0.3369140625, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 888.0, |
| "completions/max_terminated_length": 888.0, |
| "completions/mean_length": 464.78515625, |
| "completions/mean_terminated_length": 464.78515625, |
| "completions/min_length": 294.0, |
| "completions/min_terminated_length": 294.0, |
| "entropy": 0.6995432414114475, |
| "epoch": 1.7333333333333334, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.09580597281455994, |
| "learning_rate": 1e-05, |
| "loss": 0.0072, |
| "num_tokens": 58507106.0, |
| "reward": 3.8571386337280273, |
| "reward_std": 0.15944623947143555, |
| "rewards/ngram_repetition2/mean": 0.9362776875495911, |
| "rewards/ngram_repetition2/std": 0.02282153069972992, |
| "rewards/ngram_repetition3/mean": 0.98973149061203, |
| "rewards/ngram_repetition3/std": 0.008181481622159481, |
| "rewards/symbolic_reward_accuracy/mean": 0.9267578125, |
| "rewards/symbolic_reward_accuracy/std": 0.26059725880622864, |
| "rewards/symbolic_reward_partial_score/mean": 0.974609375, |
| "rewards/symbolic_reward_partial_score/std": 0.09773869067430496, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9753755927085876, |
| "rewards/thinking_answer_ratio_reward/std": 0.004951382987201214, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2027469873428345, |
| "sampling/importance_sampling_ratio/min": 0.0011123154545202851, |
| "sampling/sampling_logp_difference/max": 6.801311492919922, |
| "sampling/sampling_logp_difference/mean": 0.287597119808197, |
| "step": 104 |
| }, |
| { |
| "clip_ratio/high_max": 0.0859375, |
| "clip_ratio/high_mean": 0.02734375, |
| "clip_ratio/low_mean": 0.28125, |
| "clip_ratio/low_min": 0.1484375, |
| "clip_ratio/region_mean": 0.30859375, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 844.0, |
| "completions/max_terminated_length": 844.0, |
| "completions/mean_length": 476.17138671875, |
| "completions/mean_terminated_length": 476.17138671875, |
| "completions/min_length": 290.0, |
| "completions/min_terminated_length": 290.0, |
| "entropy": 0.7207919657230377, |
| "epoch": 1.8, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.08035453408956528, |
| "learning_rate": 1e-05, |
| "loss": 0.0083, |
| "num_tokens": 61044993.0, |
| "reward": 3.845712184906006, |
| "reward_std": 0.15924084186553955, |
| "rewards/ngram_repetition2/mean": 0.9297587871551514, |
| "rewards/ngram_repetition2/std": 0.025162160396575928, |
| "rewards/ngram_repetition3/mean": 0.9881495237350464, |
| "rewards/ngram_repetition3/std": 0.009133792482316494, |
| "rewards/symbolic_reward_accuracy/mean": 0.92138671875, |
| "rewards/symbolic_reward_accuracy/std": 0.2691999673843384, |
| "rewards/symbolic_reward_partial_score/mean": 0.9739990234375, |
| "rewards/symbolic_reward_partial_score/std": 0.09710752964019775, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9760768413543701, |
| "rewards/thinking_answer_ratio_reward/std": 0.005398593842983246, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2120659351348877, |
| "sampling/importance_sampling_ratio/min": 0.002052898984402418, |
| "sampling/sampling_logp_difference/max": 6.188502311706543, |
| "sampling/sampling_logp_difference/mean": 0.2954035997390747, |
| "step": 108 |
| }, |
| { |
| "clip_ratio/high_max": 0.1484375, |
| "clip_ratio/high_mean": 0.06103515625, |
| "clip_ratio/low_mean": 0.26611328125, |
| "clip_ratio/low_min": 0.13671875, |
| "clip_ratio/region_mean": 0.3271484375, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 844.0, |
| "completions/max_terminated_length": 844.0, |
| "completions/mean_length": 489.7314453125, |
| "completions/mean_terminated_length": 489.7314453125, |
| "completions/min_length": 308.0, |
| "completions/min_terminated_length": 308.0, |
| "entropy": 0.7733415886759758, |
| "epoch": 1.8666666666666667, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.10550207644701004, |
| "learning_rate": 1e-05, |
| "loss": 0.008, |
| "num_tokens": 63600603.0, |
| "reward": 3.808783531188965, |
| "reward_std": 0.21294765174388885, |
| "rewards/ngram_repetition2/mean": 0.9247410297393799, |
| "rewards/ngram_repetition2/std": 0.02524981088936329, |
| "rewards/ngram_repetition3/mean": 0.9873223304748535, |
| "rewards/ngram_repetition3/std": 0.009321006014943123, |
| "rewards/symbolic_reward_accuracy/mean": 0.90478515625, |
| "rewards/symbolic_reward_accuracy/std": 0.2935831546783447, |
| "rewards/symbolic_reward_partial_score/mean": 0.9703369140625, |
| "rewards/symbolic_reward_partial_score/std": 0.10009879618883133, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9755770564079285, |
| "rewards/thinking_answer_ratio_reward/std": 0.007946600206196308, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2247238159179688, |
| "sampling/importance_sampling_ratio/min": 0.0008502666023559868, |
| "sampling/sampling_logp_difference/max": 7.069960594177246, |
| "sampling/sampling_logp_difference/mean": 0.30872130393981934, |
| "step": 112 |
| }, |
| { |
| "clip_ratio/high_max": 0.109375, |
| "clip_ratio/high_mean": 0.03759765625, |
| "clip_ratio/low_mean": 0.27392578125, |
| "clip_ratio/low_min": 0.14453125, |
| "clip_ratio/region_mean": 0.3115234375, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 827.0, |
| "completions/max_terminated_length": 827.0, |
| "completions/mean_length": 477.89501953125, |
| "completions/mean_terminated_length": 477.89501953125, |
| "completions/min_length": 299.0, |
| "completions/min_terminated_length": 299.0, |
| "entropy": 0.7558131814002991, |
| "epoch": 1.9333333333333333, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.11424785852432251, |
| "learning_rate": 1e-05, |
| "loss": 0.0062, |
| "num_tokens": 66125636.0, |
| "reward": 3.8450608253479004, |
| "reward_std": 0.21053476631641388, |
| "rewards/ngram_repetition2/mean": 0.9281049966812134, |
| "rewards/ngram_repetition2/std": 0.02371094562113285, |
| "rewards/ngram_repetition3/mean": 0.987943172454834, |
| "rewards/ngram_repetition3/std": 0.008886902593076229, |
| "rewards/symbolic_reward_accuracy/mean": 0.92138671875, |
| "rewards/symbolic_reward_accuracy/std": 0.2691999673843384, |
| "rewards/symbolic_reward_partial_score/mean": 0.973388671875, |
| "rewards/symbolic_reward_partial_score/std": 0.09772618114948273, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9738227128982544, |
| "rewards/thinking_answer_ratio_reward/std": 0.007113671861588955, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2223323583602905, |
| "sampling/importance_sampling_ratio/min": 0.0015530625823885202, |
| "sampling/sampling_logp_difference/max": 6.467526435852051, |
| "sampling/sampling_logp_difference/mean": 0.3047195076942444, |
| "step": 116 |
| }, |
| { |
| "clip_ratio/high_max": 0.1171875, |
| "clip_ratio/high_mean": 0.04541015625, |
| "clip_ratio/low_mean": 0.30029296875, |
| "clip_ratio/low_min": 0.16796875, |
| "clip_ratio/region_mean": 0.345703125, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 848.0, |
| "completions/max_terminated_length": 848.0, |
| "completions/mean_length": 490.2041015625, |
| "completions/mean_terminated_length": 490.2041015625, |
| "completions/min_length": 284.0, |
| "completions/min_terminated_length": 284.0, |
| "entropy": 0.8120047375559807, |
| "epoch": 2.0, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.1353643834590912, |
| "learning_rate": 1e-05, |
| "loss": 0.0081, |
| "num_tokens": 68691558.0, |
| "reward": 3.777482748031616, |
| "reward_std": 0.2158791422843933, |
| "rewards/ngram_repetition2/mean": 0.9211949110031128, |
| "rewards/ngram_repetition2/std": 0.025232266634702682, |
| "rewards/ngram_repetition3/mean": 0.9863015413284302, |
| "rewards/ngram_repetition3/std": 0.010167215950787067, |
| "rewards/symbolic_reward_accuracy/mean": 0.89208984375, |
| "rewards/symbolic_reward_accuracy/std": 0.3103426992893219, |
| "rewards/symbolic_reward_partial_score/mean": 0.9644775390625, |
| "rewards/symbolic_reward_partial_score/std": 0.11238247156143188, |
| "rewards/tag_count_reward/mean": 1.0, |
| "rewards/tag_count_reward/std": 0.0, |
| "rewards/thinking_answer_ratio_reward/mean": 0.9750710725784302, |
| "rewards/thinking_answer_ratio_reward/std": 0.006316343788057566, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.2388031482696533, |
| "sampling/importance_sampling_ratio/min": 0.0007285280153155327, |
| "sampling/sampling_logp_difference/max": 7.224484443664551, |
| "sampling/sampling_logp_difference/mean": 0.3197594881057739, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_clip_ratio/high_max": 0.0, |
| "eval_clip_ratio/high_mean": 0.0, |
| "eval_clip_ratio/low_mean": 0.0, |
| "eval_clip_ratio/low_min": 0.0, |
| "eval_clip_ratio/region_mean": 0.0, |
| "eval_completions/clipped_ratio": 0.0, |
| "eval_completions/max_length": 724.9473684210526, |
| "eval_completions/max_terminated_length": 724.9473684210526, |
| "eval_completions/mean_length": 479.13075657894734, |
| "eval_completions/mean_terminated_length": 479.13075657894734, |
| "eval_completions/min_length": 320.7368421052632, |
| "eval_completions/min_terminated_length": 320.7368421052632, |
| "eval_entropy": 0.8422083133145383, |
| "eval_frac_reward_zero_std": 0.0, |
| "eval_loss": 0.002333071082830429, |
| "eval_num_tokens": 68691558.0, |
| "eval_reward": 3.9059901237487793, |
| "eval_reward_std": 0.15592951225852103, |
| "eval_rewards/ngram_repetition2/mean": 0.916603662465748, |
| "eval_rewards/ngram_repetition2/std": 0.02568632119188183, |
| "eval_rewards/ngram_repetition3/mean": 0.985195959869184, |
| "eval_rewards/ngram_repetition3/std": 0.00971777170994564, |
| "eval_rewards/symbolic_reward_accuracy/mean": 0.9469572368421053, |
| "eval_rewards/symbolic_reward_accuracy/std": 0.18719423405433955, |
| "eval_rewards/symbolic_reward_partial_score/mean": 0.9833470394736842, |
| "eval_rewards/symbolic_reward_partial_score/std": 0.06975230809889342, |
| "eval_rewards/tag_count_reward/mean": 1.0, |
| "eval_rewards/tag_count_reward/std": 0.0, |
| "eval_rewards/thinking_answer_ratio_reward/mean": 0.9710588988504911, |
| "eval_rewards/thinking_answer_ratio_reward/std": 0.008188823862981639, |
| "eval_runtime": 516.789, |
| "eval_samples_per_second": 0.29, |
| "eval_sampling/importance_sampling_ratio/max": 2.0, |
| "eval_sampling/importance_sampling_ratio/mean": 1.248612272111993, |
| "eval_sampling/importance_sampling_ratio/min": 0.0039012981946335025, |
| "eval_sampling/sampling_logp_difference/max": 5.646010674928364, |
| "eval_sampling/sampling_logp_difference/mean": 0.32886375722132233, |
| "eval_steps_per_second": 0.004, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 120, |
| "total_flos": 0.0, |
| "train_loss": 0.0032316646189428865, |
| "train_runtime": 11600.051, |
| "train_samples_per_second": 0.349, |
| "train_steps_per_second": 0.01 |
| } |
| ], |
| "logging_steps": 4, |
| "max_steps": 120, |
| "num_input_tokens_seen": 68691558, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|