smoke_test_cm_2_gr / trainer_state.json
Gege24's picture
Upload task output 1
3e49ebb verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7e-05,
"eval_steps": 500,
"global_step": 7,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 4548.0,
"completions/max_terminated_length": 4548.0,
"completions/mean_length": 4017.53125,
"completions/mean_terminated_length": 4017.53125,
"completions/min_length": 1087.0,
"completions/min_terminated_length": 1087.0,
"entropy": 0.2045444082468748,
"epoch": 1e-05,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.182149887084961,
"kl": 0.0,
"learning_rate": 0.0,
"loss": 0.0504,
"num_tokens": 152249.0,
"reward": -1.9340598583221436,
"reward_std": 0.786571741104126,
"rewards/rollout_reward_func/mean": -1.9340598583221436,
"rewards/rollout_reward_func/std": 0.8494329452514648,
"sampling/importance_sampling_ratio/max": 1.7909198999404907,
"sampling/importance_sampling_ratio/mean": 0.9390337467193604,
"sampling/importance_sampling_ratio/min": 0.2846600413322449,
"sampling/sampling_logp_difference/max": 0.8398352861404419,
"sampling/sampling_logp_difference/mean": 0.019423075020313263,
"step": 1,
"step_time": 62.780601161008235
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"entropy": 0.2045444082468748,
"epoch": 2e-05,
"grad_norm": 2.1998178958892822,
"kl": 0.0,
"learning_rate": 2.8571428571428575e-07,
"loss": 0.0504,
"step": 2,
"step_time": 13.312115765969793
},
{
"clip_ratio/high_max": 0.010176970972679555,
"clip_ratio/high_mean": 0.005981342634186149,
"clip_ratio/low_mean": 0.0027048319461755455,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.008686174580361694,
"completions/clipped_ratio": 0.0,
"completions/max_length": 4517.0,
"completions/max_terminated_length": 4517.0,
"completions/mean_length": 4186.15625,
"completions/mean_terminated_length": 4186.15625,
"completions/min_length": 1764.0,
"completions/min_terminated_length": 1764.0,
"entropy": 0.17139518074691296,
"epoch": 3e-05,
"frac_reward_zero_std": 0.0,
"grad_norm": 4.413552761077881,
"kl": 0.001802493366994895,
"learning_rate": 5.714285714285715e-07,
"loss": -0.0455,
"num_tokens": 310339.0,
"reward": -2.418001174926758,
"reward_std": 0.5239017009735107,
"rewards/rollout_reward_func/mean": -2.418001174926758,
"rewards/rollout_reward_func/std": 0.5344496965408325,
"sampling/importance_sampling_ratio/max": 2.12880802154541,
"sampling/importance_sampling_ratio/mean": 1.0546352863311768,
"sampling/importance_sampling_ratio/min": 0.30370277166366577,
"sampling/sampling_logp_difference/max": 1.1611073017120361,
"sampling/sampling_logp_difference/mean": 0.019413897767663002,
"step": 3,
"step_time": 61.638283942011185
},
{
"clip_ratio/high_max": 0.010176970972679555,
"clip_ratio/high_mean": 0.005088485486339778,
"clip_ratio/low_mean": 0.0017857142956927419,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0068741998402401805,
"entropy": 0.17303790897130966,
"epoch": 4e-05,
"grad_norm": 4.2835798263549805,
"kl": 0.0016472467759740539,
"learning_rate": 8.571428571428572e-07,
"loss": -0.0444,
"step": 4,
"step_time": 14.028172414000437
},
{
"clip_ratio/high_max": 0.007093983003869653,
"clip_ratio/high_mean": 0.004427273175679147,
"clip_ratio/low_mean": 0.003508900583256036,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.007936173758935183,
"completions/clipped_ratio": 0.0,
"completions/max_length": 4595.0,
"completions/max_terminated_length": 4595.0,
"completions/mean_length": 4118.15625,
"completions/mean_terminated_length": 4118.15625,
"completions/min_length": 2390.0,
"completions/min_terminated_length": 2390.0,
"entropy": 0.17528061009943485,
"epoch": 5e-05,
"frac_reward_zero_std": 0.0,
"grad_norm": 5.063594818115234,
"kl": 0.0027295032050460577,
"learning_rate": 1.142857142857143e-06,
"loss": -0.229,
"num_tokens": 465900.0,
"reward": -1.7591285705566406,
"reward_std": 1.178617238998413,
"rewards/rollout_reward_func/mean": -1.7591285705566406,
"rewards/rollout_reward_func/std": 1.5458139181137085,
"sampling/importance_sampling_ratio/max": 2.2674672603607178,
"sampling/importance_sampling_ratio/mean": 0.9966017007827759,
"sampling/importance_sampling_ratio/min": 0.47512274980545044,
"sampling/sampling_logp_difference/max": 0.6794887781143188,
"sampling/sampling_logp_difference/mean": 0.018155813217163086,
"step": 5,
"step_time": 62.168050993997895
},
{
"clip_ratio/high_max": 0.010666901711374521,
"clip_ratio/high_mean": 0.005333450855687261,
"clip_ratio/low_mean": 0.0043769561452791095,
"clip_ratio/low_min": 0.0017605633474886417,
"clip_ratio/region_mean": 0.009710407059174031,
"entropy": 0.17503651790320873,
"epoch": 6e-05,
"grad_norm": 3.878859281539917,
"kl": 0.0019046790257561952,
"learning_rate": 1.4285714285714286e-06,
"loss": -0.2351,
"step": 6,
"step_time": 13.381077748003008
},
{
"clip_ratio/high_max": 0.017248393152840436,
"clip_ratio/high_mean": 0.00949225208023563,
"clip_ratio/low_mean": 0.006492404674645513,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.015984656638465822,
"completions/clipped_ratio": 0.0,
"completions/max_length": 4658.0,
"completions/max_terminated_length": 4658.0,
"completions/mean_length": 4001.125,
"completions/mean_terminated_length": 4001.125,
"completions/min_length": 402.0,
"completions/min_terminated_length": 402.0,
"entropy": 0.21643172018229961,
"epoch": 7e-05,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.980289936065674,
"kl": 0.001957591186510399,
"learning_rate": 1.7142857142857145e-06,
"loss": 0.1748,
"num_tokens": 618440.0,
"reward": -1.9611331224441528,
"reward_std": 0.7152342796325684,
"rewards/rollout_reward_func/mean": -1.9611331224441528,
"rewards/rollout_reward_func/std": 0.7630742192268372,
"sampling/importance_sampling_ratio/max": 1.5719738006591797,
"sampling/importance_sampling_ratio/mean": 0.9174970388412476,
"sampling/importance_sampling_ratio/min": 0.30265289545059204,
"sampling/sampling_logp_difference/max": 0.5645420551300049,
"sampling/sampling_logp_difference/mean": 0.019142862409353256,
"step": 7,
"step_time": 61.62411752099433
}
],
"logging_steps": 1.0,
"max_steps": 400000,
"num_input_tokens_seen": 618440,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}