dgonier's picture
Upload folder using huggingface_hub
25d6d35 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 30,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5333333333333333,
"grad_norm": 0.7782864570617676,
"learning_rate": 9.966191788709714e-07,
"log_odds_chosen": 0.7443415522575378,
"log_odds_ratio": -0.4570655822753906,
"logits/chosen": 3.981205701828003,
"logits/rejected": 4.507437705993652,
"logps/chosen": -1.3964179754257202,
"logps/rejected": -2.015876054763794,
"loss": 2.3017,
"nll_loss": 2.2559781074523926,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.13964179158210754,
"rewards/margins": 0.06194580718874931,
"rewards/rejected": -0.20158760249614716,
"step": 5
},
{
"epoch": 1.0,
"grad_norm": 1.1227214336395264,
"learning_rate": 8.83022221559489e-07,
"log_odds_chosen": 0.642013669013977,
"log_odds_ratio": -0.48525574803352356,
"logits/chosen": 4.06285285949707,
"logits/rejected": 4.569836139678955,
"logps/chosen": -1.4217931032180786,
"logps/rejected": -1.9608324766159058,
"loss": 2.3875,
"nll_loss": 2.2945656776428223,
"rewards/accuracies": 0.7428571581840515,
"rewards/chosen": -0.14217932522296906,
"rewards/margins": 0.05390391871333122,
"rewards/rejected": -0.19608324766159058,
"step": 10
},
{
"epoch": 1.5333333333333332,
"grad_norm": 0.7834094762802124,
"learning_rate": 6.434016163555451e-07,
"log_odds_chosen": 0.8146808743476868,
"log_odds_ratio": -0.4321725368499756,
"logits/chosen": 4.006339073181152,
"logits/rejected": 4.592737197875977,
"logps/chosen": -1.3805153369903564,
"logps/rejected": -2.061169147491455,
"loss": 2.3073,
"nll_loss": 2.2640388011932373,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.13805153965950012,
"rewards/margins": 0.06806538999080658,
"rewards/rejected": -0.2061169445514679,
"step": 15
},
{
"epoch": 2.0,
"grad_norm": 0.8477768301963806,
"learning_rate": 3.56598383644455e-07,
"log_odds_chosen": 0.5591691136360168,
"log_odds_ratio": -0.5144891738891602,
"logits/chosen": 4.0338454246521,
"logits/rejected": 4.464445114135742,
"logps/chosen": -1.436565637588501,
"logps/rejected": -1.9033842086791992,
"loss": 2.3271,
"nll_loss": 2.283417224884033,
"rewards/accuracies": 0.7428571581840515,
"rewards/chosen": -0.14365655183792114,
"rewards/margins": 0.046681854873895645,
"rewards/rejected": -0.19033841788768768,
"step": 20
},
{
"epoch": 2.533333333333333,
"grad_norm": 0.7918501496315002,
"learning_rate": 1.1697777844051104e-07,
"log_odds_chosen": 0.7850446105003357,
"log_odds_ratio": -0.4418957233428955,
"logits/chosen": 3.9244980812072754,
"logits/rejected": 4.572809219360352,
"logps/chosen": -1.3659677505493164,
"logps/rejected": -2.012371301651001,
"loss": 2.2538,
"nll_loss": 2.2095985412597656,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.13659678399562836,
"rewards/margins": 0.0646403506398201,
"rewards/rejected": -0.20123712718486786,
"step": 25
},
{
"epoch": 3.0,
"grad_norm": 1.1368457078933716,
"learning_rate": 3.380821129028488e-09,
"log_odds_chosen": 0.5917825102806091,
"log_odds_ratio": -0.5039272904396057,
"logits/chosen": 4.121052265167236,
"logits/rejected": 4.495968818664551,
"logps/chosen": -1.455165982246399,
"logps/rejected": -1.9601490497589111,
"loss": 2.3857,
"nll_loss": 2.3466548919677734,
"rewards/accuracies": 0.6857143044471741,
"rewards/chosen": -0.14551660418510437,
"rewards/margins": 0.05049830302596092,
"rewards/rejected": -0.1960149109363556,
"step": 30
}
],
"logging_steps": 5,
"max_steps": 30,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}