single-gpu-safe-test_v2 / trainer_state.json

End of training

e8ce2c6 verified 10 months ago

5.22 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.3410059676044331,
	"eval_steps": 500,
	"global_step": 100,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"completion_length": 140.53125,
	"epoch": 0.03410059676044331,
	"grad_norm": 0.267546683549881,
	"kl": 0.0008768478994170437,
	"learning_rate": 4.936026311617316e-07,
	"loss": 0.0,
	"reward": 1.040625,
	"reward_std": 0.13700193651020526,
	"rewards/correctness_reward_func_factual": 0.1140625,
	"rewards/format_reward_func_factual": 0.9265625,
	"step": 10
	},
	{
	"completion_length": 145.9953125,
	"epoch": 0.06820119352088662,
	"grad_norm": 0.050062697380781174,
	"kl": 0.0010662404758477351,
	"learning_rate": 4.630542059139923e-07,
	"loss": 0.0,
	"reward": 0.9890625,
	"reward_std": 0.12595339305698872,
	"rewards/correctness_reward_func_factual": 0.071875,
	"rewards/format_reward_func_factual": 0.9171875,
	"step": 20
	},
	{
	"completion_length": 146.1140625,
	"epoch": 0.10230179028132992,
	"grad_norm": 0.18261057138442993,
	"kl": 0.0010776295835967175,
	"learning_rate": 4.1035205490778496e-07,
	"loss": 0.0,
	"reward": 0.9875,
	"reward_std": 0.1414213538169861,
	"rewards/correctness_reward_func_factual": 0.0671875,
	"rewards/format_reward_func_factual": 0.9203125,
	"step": 30
	},
	{
	"completion_length": 141.715625,
	"epoch": 0.13640238704177324,
	"grad_norm": 0.25042295455932617,
	"kl": 0.0010743443999672309,
	"learning_rate": 3.409762342408719e-07,
	"loss": 0.0,
	"reward": 1.0390625,
	"reward_std": 0.1392116451635957,
	"rewards/correctness_reward_func_factual": 0.103125,
	"rewards/format_reward_func_factual": 0.9359375,
	"step": 40
	},
	{
	"completion_length": 147.79375,
	"epoch": 0.17050298380221654,
	"grad_norm": 0.1481354683637619,
	"kl": 0.0011524090303282719,
	"learning_rate": 2.621405555286121e-07,
	"loss": 0.0,
	"reward": 1.0125,
	"reward_std": 0.12816310189664365,
	"rewards/correctness_reward_func_factual": 0.078125,
	"rewards/format_reward_func_factual": 0.934375,
	"step": 50
	},
	{
	"completion_length": 144.015625,
	"epoch": 0.20460358056265984,
	"grad_norm": 0.06519610434770584,
	"kl": 0.001239983293635305,
	"learning_rate": 1.8204248194091425e-07,
	"loss": 0.0,
	"reward": 0.98125,
	"reward_std": 0.12816310189664365,
	"rewards/correctness_reward_func_factual": 0.05,
	"rewards/format_reward_func_factual": 0.93125,
	"step": 60
	},
	{
	"completion_length": 144.9734375,
	"epoch": 0.23870417732310314,
	"grad_norm": 0.055830299854278564,
	"kl": 0.0012718188016151543,
	"learning_rate": 1.0901074253727336e-07,
	"loss": 0.0,
	"reward": 0.9984375,
	"reward_std": 0.1524698968976736,
	"rewards/correctness_reward_func_factual": 0.0796875,
	"rewards/format_reward_func_factual": 0.91875,
	"step": 70
	},
	{
	"completion_length": 145.4859375,
	"epoch": 0.2728047740835465,
	"grad_norm": 0.4993789792060852,
	"kl": 0.0011785521975980374,
	"learning_rate": 5.0639297359319846e-08,
	"loss": 0.0,
	"reward": 1.003125,
	"reward_std": 0.13258251920342445,
	"rewards/correctness_reward_func_factual": 0.075,
	"rewards/format_reward_func_factual": 0.928125,
	"step": 80
	},
	{
	"completion_length": 145.9609375,
	"epoch": 0.3069053708439898,
	"grad_norm": 0.08759493380784988,
	"kl": 0.0012535244946775491,
	"learning_rate": 1.2997705010932391e-08,
	"loss": 0.0,
	"reward": 0.990625,
	"reward_std": 0.1414213538169861,
	"rewards/correctness_reward_func_factual": 0.078125,
	"rewards/format_reward_func_factual": 0.9125,
	"step": 90
	},
	{
	"completion_length": 146.890625,
	"epoch": 0.3410059676044331,
	"grad_norm": 0.5095328688621521,
	"kl": 0.0012054607454047074,
	"learning_rate": 0.0,
	"loss": 0.0,
	"reward": 1.021875,
	"reward_std": 0.11048543266952038,
	"rewards/correctness_reward_func_factual": 0.0765625,
	"rewards/format_reward_func_factual": 0.9453125,
	"step": 100
	},
	{
	"epoch": 0.3410059676044331,
	"step": 100,
	"total_flos": 0.0,
	"train_loss": 1.139334672188852e-06,
	"train_runtime": 11009.3263,
	"train_samples_per_second": 0.291,
	"train_steps_per_second": 0.009
	}
	],
	"logging_steps": 10,
	"max_steps": 100,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 50,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 0.0,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}