OpenElla3-Llama3.2B-V2 / checkpoint-50 /trainer_state.json
ItsMeDevRoland's picture
Upload folder using huggingface_hub
132841a verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.8,
"eval_steps": 50,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"grad_norm": 3.3149960041046143,
"learning_rate": 5e-06,
"logits/chosen": -0.24067819118499756,
"logits/rejected": -0.4968351423740387,
"logps/chosen": -433.0858459472656,
"logps/rejected": -68.33470153808594,
"loss": 0.1722,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.1653084754943848,
"rewards/margins": 1.9695370197296143,
"rewards/rejected": 0.19577142596244812,
"step": 5
},
{
"epoch": 0.16,
"grad_norm": 1.4687750339508057,
"learning_rate": 1e-05,
"logits/chosen": -0.23465164005756378,
"logits/rejected": -0.5149508118629456,
"logps/chosen": -433.3921813964844,
"logps/rejected": -65.57392883300781,
"loss": 0.1348,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 2.6356241703033447,
"rewards/margins": 2.478445529937744,
"rewards/rejected": 0.15717869997024536,
"step": 10
},
{
"epoch": 0.24,
"grad_norm": 0.4518139958381653,
"learning_rate": 1.5e-05,
"logits/chosen": -0.22109150886535645,
"logits/rejected": -0.5422734022140503,
"logps/chosen": -421.8102111816406,
"logps/rejected": -68.03514099121094,
"loss": 0.0432,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.584801197052002,
"rewards/margins": 3.5185985565185547,
"rewards/rejected": 0.06620248407125473,
"step": 15
},
{
"epoch": 0.32,
"grad_norm": 0.12663038074970245,
"learning_rate": 2e-05,
"logits/chosen": -0.21289744973182678,
"logits/rejected": -0.5218192338943481,
"logps/chosen": -414.16058349609375,
"logps/rejected": -72.08072662353516,
"loss": 0.0138,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.662674903869629,
"rewards/margins": 4.634940147399902,
"rewards/rejected": 0.02773415483534336,
"step": 20
},
{
"epoch": 0.4,
"grad_norm": 0.043434809893369675,
"learning_rate": 2.5e-05,
"logits/chosen": -0.1724880337715149,
"logits/rejected": -0.632266640663147,
"logps/chosen": -391.53204345703125,
"logps/rejected": -71.50444030761719,
"loss": 0.0039,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.770969390869141,
"rewards/margins": 5.899745464324951,
"rewards/rejected": -0.12877611815929413,
"step": 25
},
{
"epoch": 0.48,
"grad_norm": 0.012639075517654419,
"learning_rate": 3e-05,
"logits/chosen": -0.11160198599100113,
"logits/rejected": -0.46575218439102173,
"logps/chosen": -387.0035095214844,
"logps/rejected": -68.87992095947266,
"loss": 0.0012,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.1022233963012695,
"rewards/margins": 7.20810604095459,
"rewards/rejected": -0.10588403791189194,
"step": 30
},
{
"epoch": 0.56,
"grad_norm": 0.008335842750966549,
"learning_rate": 3.5e-05,
"logits/chosen": -0.16194215416908264,
"logits/rejected": -0.5091412663459778,
"logps/chosen": -380.4056701660156,
"logps/rejected": -70.05772399902344,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.549788475036621,
"rewards/margins": 7.717337608337402,
"rewards/rejected": -0.16754867136478424,
"step": 35
},
{
"epoch": 0.64,
"grad_norm": 0.0034919867757707834,
"learning_rate": 4e-05,
"logits/chosen": -0.12451864778995514,
"logits/rejected": -0.4964370131492615,
"logps/chosen": -379.21673583984375,
"logps/rejected": -69.48336029052734,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": 8.368996620178223,
"rewards/margins": 8.601344108581543,
"rewards/rejected": -0.23234805464744568,
"step": 40
},
{
"epoch": 0.72,
"grad_norm": 0.0023931912146508694,
"learning_rate": 4.5e-05,
"logits/chosen": -0.11491024494171143,
"logits/rejected": -0.5707582831382751,
"logps/chosen": -367.6617431640625,
"logps/rejected": -71.28264617919922,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": 8.458444595336914,
"rewards/margins": 8.877108573913574,
"rewards/rejected": -0.41866397857666016,
"step": 45
},
{
"epoch": 0.8,
"grad_norm": 0.002425891114398837,
"learning_rate": 5e-05,
"logits/chosen": -0.1450928896665573,
"logits/rejected": -0.6191288232803345,
"logps/chosen": -371.6398620605469,
"logps/rejected": -75.66804504394531,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 8.693218231201172,
"rewards/margins": 9.168657302856445,
"rewards/rejected": -0.4754392206668854,
"step": 50
},
{
"epoch": 0.8,
"eval_logits/chosen": -0.09945501387119293,
"eval_logits/rejected": -0.5488065481185913,
"eval_logps/chosen": -366.370849609375,
"eval_logps/rejected": -72.13478088378906,
"eval_loss": 0.0001304554898524657,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 8.9785737991333,
"eval_rewards/margins": 9.362648963928223,
"eval_rewards/rejected": -0.3840752840042114,
"eval_runtime": 31.4719,
"eval_samples_per_second": 3.177,
"eval_steps_per_second": 0.794,
"step": 50
}
],
"logging_steps": 5,
"max_steps": 186,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}