parthsarin's picture
Upload folder using huggingface_hub
1fd6668 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.7246376811594203,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 1.1672008395195008,
"epoch": 0.07246376811594203,
"grad_norm": 0.5196013675880937,
"learning_rate": 1.3846153846153847e-05,
"loss": 0.1163,
"mean_token_accuracy": 0.9755751550197601,
"num_tokens": 684207.0,
"step": 10
},
{
"entropy": 1.1451235294342041,
"epoch": 0.14492753623188406,
"grad_norm": 0.12279766914075808,
"learning_rate": 1.9988954038232098e-05,
"loss": 0.1032,
"mean_token_accuracy": 0.9854924380779266,
"num_tokens": 1384915.0,
"step": 20
},
{
"entropy": 1.1682737946510315,
"epoch": 0.21739130434782608,
"grad_norm": 0.11855212926213639,
"learning_rate": 1.9921539290043846e-05,
"loss": 0.0471,
"mean_token_accuracy": 0.9901047766208648,
"num_tokens": 2148413.0,
"step": 30
},
{
"entropy": 1.2033948302268982,
"epoch": 0.2898550724637681,
"grad_norm": 0.21467341414107938,
"learning_rate": 1.979325950184511e-05,
"loss": 0.0477,
"mean_token_accuracy": 0.9882820785045624,
"num_tokens": 2814343.0,
"step": 40
},
{
"entropy": 1.1613067984580994,
"epoch": 0.36231884057971014,
"grad_norm": 0.10971133409734304,
"learning_rate": 1.9604901623544868e-05,
"loss": 0.0616,
"mean_token_accuracy": 0.9847880423069,
"num_tokens": 3554118.0,
"step": 50
},
{
"entropy": 1.2155601382255554,
"epoch": 0.43478260869565216,
"grad_norm": 0.25848802070836036,
"learning_rate": 1.9357621162314325e-05,
"loss": 0.046,
"mean_token_accuracy": 0.9875870645046234,
"num_tokens": 4274087.0,
"step": 60
},
{
"entropy": 1.2151318550109864,
"epoch": 0.5072463768115942,
"grad_norm": 0.5463161288354474,
"learning_rate": 1.9052935093970097e-05,
"loss": 0.0482,
"mean_token_accuracy": 0.9867257714271546,
"num_tokens": 4995138.0,
"step": 70
},
{
"entropy": 1.2199562430381774,
"epoch": 0.5797101449275363,
"grad_norm": 0.13160204776259016,
"learning_rate": 1.869271255687855e-05,
"loss": 0.0531,
"mean_token_accuracy": 0.9862560451030731,
"num_tokens": 5752229.0,
"step": 80
},
{
"entropy": 1.227805531024933,
"epoch": 0.6521739130434783,
"grad_norm": 0.13142100307101248,
"learning_rate": 1.827916338547079e-05,
"loss": 0.0382,
"mean_token_accuracy": 0.9892027378082275,
"num_tokens": 6451519.0,
"step": 90
},
{
"entropy": 1.2010000348091125,
"epoch": 0.7246376811594203,
"grad_norm": 0.1876410091154084,
"learning_rate": 1.7814824553711062e-05,
"loss": 0.0524,
"mean_token_accuracy": 0.9872801005840302,
"num_tokens": 7179797.0,
"step": 100
}
],
"logging_steps": 10,
"max_steps": 414,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1109742516699136.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}