RaspberryVitriol's picture
Upload checkpoint-1000
49f2370 verified
{
"best_global_step": 1000,
"best_metric": 0.8257431354055336,
"best_model_checkpoint": "/kaggle/working/checkpoints_me5_simple/checkpoint-1000",
"epoch": 1.9276759884281582,
"eval_steps": 200,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03857280617164899,
"grad_norm": 2.771850347518921,
"learning_rate": 2.435897435897436e-06,
"loss": 0.6886,
"step": 20
},
{
"epoch": 0.07714561234329798,
"grad_norm": 4.430428504943848,
"learning_rate": 5e-06,
"loss": 0.6607,
"step": 40
},
{
"epoch": 0.11571841851494696,
"grad_norm": 3.110168933868408,
"learning_rate": 7.564102564102564e-06,
"loss": 0.6083,
"step": 60
},
{
"epoch": 0.15429122468659595,
"grad_norm": 4.398952960968018,
"learning_rate": 1.012820512820513e-05,
"loss": 0.5112,
"step": 80
},
{
"epoch": 0.19286403085824494,
"grad_norm": 12.522466659545898,
"learning_rate": 1.2692307692307693e-05,
"loss": 0.512,
"step": 100
},
{
"epoch": 0.23143683702989393,
"grad_norm": 3.5153162479400635,
"learning_rate": 1.5256410256410257e-05,
"loss": 0.4761,
"step": 120
},
{
"epoch": 0.2700096432015429,
"grad_norm": 8.53997802734375,
"learning_rate": 1.7820512820512823e-05,
"loss": 0.5021,
"step": 140
},
{
"epoch": 0.3085824493731919,
"grad_norm": 10.39119815826416,
"learning_rate": 1.9957173447537473e-05,
"loss": 0.479,
"step": 160
},
{
"epoch": 0.3471552555448409,
"grad_norm": 8.681617736816406,
"learning_rate": 1.9671663097787296e-05,
"loss": 0.4733,
"step": 180
},
{
"epoch": 0.3857280617164899,
"grad_norm": 5.133293151855469,
"learning_rate": 1.9386152748037116e-05,
"loss": 0.451,
"step": 200
},
{
"epoch": 0.3857280617164899,
"eval_accuracy": 0.7950875288370199,
"eval_f1": 0.7941359630394611,
"eval_loss": 0.43800750374794006,
"eval_runtime": 106.7777,
"eval_samples_per_second": 69.013,
"eval_steps_per_second": 0.543,
"step": 200
},
{
"epoch": 0.42430086788813887,
"grad_norm": 4.8331804275512695,
"learning_rate": 1.910064239828694e-05,
"loss": 0.4459,
"step": 220
},
{
"epoch": 0.46287367405978785,
"grad_norm": 5.214622974395752,
"learning_rate": 1.8815132048536763e-05,
"loss": 0.458,
"step": 240
},
{
"epoch": 0.5014464802314368,
"grad_norm": 6.8793416023254395,
"learning_rate": 1.8529621698786583e-05,
"loss": 0.4355,
"step": 260
},
{
"epoch": 0.5400192864030858,
"grad_norm": 4.028793811798096,
"learning_rate": 1.8244111349036403e-05,
"loss": 0.4344,
"step": 280
},
{
"epoch": 0.5785920925747348,
"grad_norm": 3.3970532417297363,
"learning_rate": 1.7958600999286226e-05,
"loss": 0.4462,
"step": 300
},
{
"epoch": 0.6171648987463838,
"grad_norm": 4.14546012878418,
"learning_rate": 1.7673090649536046e-05,
"loss": 0.4217,
"step": 320
},
{
"epoch": 0.6557377049180327,
"grad_norm": 5.674211502075195,
"learning_rate": 1.738758029978587e-05,
"loss": 0.4358,
"step": 340
},
{
"epoch": 0.6943105110896818,
"grad_norm": 4.182572841644287,
"learning_rate": 1.7102069950035693e-05,
"loss": 0.4225,
"step": 360
},
{
"epoch": 0.7328833172613307,
"grad_norm": 3.3439126014709473,
"learning_rate": 1.6816559600285513e-05,
"loss": 0.4414,
"step": 380
},
{
"epoch": 0.7714561234329798,
"grad_norm": 7.2115912437438965,
"learning_rate": 1.6531049250535333e-05,
"loss": 0.4103,
"step": 400
},
{
"epoch": 0.7714561234329798,
"eval_accuracy": 0.8002442665219162,
"eval_f1": 0.8001647440873483,
"eval_loss": 0.4134698808193207,
"eval_runtime": 106.1965,
"eval_samples_per_second": 69.39,
"eval_steps_per_second": 0.546,
"step": 400
},
{
"epoch": 0.8100289296046287,
"grad_norm": 5.1122050285339355,
"learning_rate": 1.6245538900785153e-05,
"loss": 0.4194,
"step": 420
},
{
"epoch": 0.8486017357762777,
"grad_norm": 5.695277214050293,
"learning_rate": 1.5960028551034976e-05,
"loss": 0.4271,
"step": 440
},
{
"epoch": 0.8871745419479267,
"grad_norm": 4.1516523361206055,
"learning_rate": 1.56745182012848e-05,
"loss": 0.4282,
"step": 460
},
{
"epoch": 0.9257473481195757,
"grad_norm": 4.449565410614014,
"learning_rate": 1.538900785153462e-05,
"loss": 0.4051,
"step": 480
},
{
"epoch": 0.9643201542912246,
"grad_norm": 6.348763942718506,
"learning_rate": 1.5103497501784441e-05,
"loss": 0.4096,
"step": 500
},
{
"epoch": 1.0019286403085825,
"grad_norm": 4.346441268920898,
"learning_rate": 1.4817987152034263e-05,
"loss": 0.3754,
"step": 520
},
{
"epoch": 1.0405014464802314,
"grad_norm": 3.8286328315734863,
"learning_rate": 1.4532476802284083e-05,
"loss": 0.3659,
"step": 540
},
{
"epoch": 1.0790742526518804,
"grad_norm": 3.8285741806030273,
"learning_rate": 1.4246966452533906e-05,
"loss": 0.3518,
"step": 560
},
{
"epoch": 1.1176470588235294,
"grad_norm": 8.565245628356934,
"learning_rate": 1.3961456102783728e-05,
"loss": 0.3832,
"step": 580
},
{
"epoch": 1.1562198649951785,
"grad_norm": 7.982793807983398,
"learning_rate": 1.3675945753033548e-05,
"loss": 0.3799,
"step": 600
},
{
"epoch": 1.1562198649951785,
"eval_accuracy": 0.8135432216040168,
"eval_f1": 0.8131288556339042,
"eval_loss": 0.40015241503715515,
"eval_runtime": 106.0288,
"eval_samples_per_second": 69.5,
"eval_steps_per_second": 0.547,
"step": 600
},
{
"epoch": 1.1947926711668273,
"grad_norm": 4.712324619293213,
"learning_rate": 1.3390435403283371e-05,
"loss": 0.3668,
"step": 620
},
{
"epoch": 1.2333654773384763,
"grad_norm": 5.1233906745910645,
"learning_rate": 1.3104925053533191e-05,
"loss": 0.3609,
"step": 640
},
{
"epoch": 1.2719382835101254,
"grad_norm": 4.738674640655518,
"learning_rate": 1.2819414703783013e-05,
"loss": 0.3538,
"step": 660
},
{
"epoch": 1.3105110896817744,
"grad_norm": 4.720370769500732,
"learning_rate": 1.2533904354032836e-05,
"loss": 0.3591,
"step": 680
},
{
"epoch": 1.3490838958534233,
"grad_norm": 5.274127006530762,
"learning_rate": 1.2248394004282656e-05,
"loss": 0.3469,
"step": 700
},
{
"epoch": 1.3876567020250723,
"grad_norm": 4.651730537414551,
"learning_rate": 1.1962883654532478e-05,
"loss": 0.3547,
"step": 720
},
{
"epoch": 1.4262295081967213,
"grad_norm": 3.479980945587158,
"learning_rate": 1.1677373304782301e-05,
"loss": 0.3647,
"step": 740
},
{
"epoch": 1.4648023143683702,
"grad_norm": 4.897223949432373,
"learning_rate": 1.1391862955032121e-05,
"loss": 0.3468,
"step": 760
},
{
"epoch": 1.5033751205400194,
"grad_norm": 5.784550666809082,
"learning_rate": 1.1106352605281943e-05,
"loss": 0.3492,
"step": 780
},
{
"epoch": 1.5419479267116682,
"grad_norm": 3.776372194290161,
"learning_rate": 1.0820842255531764e-05,
"loss": 0.3695,
"step": 800
},
{
"epoch": 1.5419479267116682,
"eval_accuracy": 0.8222282534943683,
"eval_f1": 0.8190341805933294,
"eval_loss": 0.40005970001220703,
"eval_runtime": 106.1744,
"eval_samples_per_second": 69.405,
"eval_steps_per_second": 0.546,
"step": 800
},
{
"epoch": 1.5805207328833173,
"grad_norm": 4.813859939575195,
"learning_rate": 1.0535331905781586e-05,
"loss": 0.3568,
"step": 820
},
{
"epoch": 1.6190935390549663,
"grad_norm": 6.341957092285156,
"learning_rate": 1.0249821556031408e-05,
"loss": 0.363,
"step": 840
},
{
"epoch": 1.6576663452266152,
"grad_norm": 4.9156036376953125,
"learning_rate": 9.96431120628123e-06,
"loss": 0.3492,
"step": 860
},
{
"epoch": 1.6962391513982642,
"grad_norm": 3.488044023513794,
"learning_rate": 9.678800856531049e-06,
"loss": 0.3416,
"step": 880
},
{
"epoch": 1.7348119575699132,
"grad_norm": 4.383437156677246,
"learning_rate": 9.39329050678087e-06,
"loss": 0.3444,
"step": 900
},
{
"epoch": 1.773384763741562,
"grad_norm": 5.447354793548584,
"learning_rate": 9.107780157030694e-06,
"loss": 0.3486,
"step": 920
},
{
"epoch": 1.8119575699132113,
"grad_norm": 5.164237022399902,
"learning_rate": 8.822269807280514e-06,
"loss": 0.339,
"step": 940
},
{
"epoch": 1.8505303760848602,
"grad_norm": 4.903033256530762,
"learning_rate": 8.536759457530336e-06,
"loss": 0.3496,
"step": 960
},
{
"epoch": 1.8891031822565092,
"grad_norm": 4.962741374969482,
"learning_rate": 8.251249107780157e-06,
"loss": 0.3563,
"step": 980
},
{
"epoch": 1.9276759884281582,
"grad_norm": 4.129799842834473,
"learning_rate": 7.965738758029979e-06,
"loss": 0.3528,
"step": 1000
},
{
"epoch": 1.9276759884281582,
"eval_accuracy": 0.8261636585696838,
"eval_f1": 0.8257431354055336,
"eval_loss": 0.3805844485759735,
"eval_runtime": 106.4119,
"eval_samples_per_second": 69.25,
"eval_steps_per_second": 0.545,
"step": 1000
}
],
"logging_steps": 20,
"max_steps": 1557,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}