srishtirai's picture
Upload folder using huggingface_hub
c031076 verified
{
"best_metric": 0.03272353485226631,
"best_model_checkpoint": "./codellama_sql_model_forestry/checkpoint-400",
"epoch": 2.5006257822277846,
"eval_steps": 100,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05006257822277847,
"grad_norm": 7.185255527496338,
"learning_rate": 0.0001979899497487437,
"loss": 32.456,
"step": 10
},
{
"epoch": 0.10012515644555695,
"grad_norm": 0.1251622438430786,
"learning_rate": 0.0001946398659966499,
"loss": 0.4541,
"step": 20
},
{
"epoch": 0.15018773466833543,
"grad_norm": 0.20880717039108276,
"learning_rate": 0.0001912897822445561,
"loss": 0.3029,
"step": 30
},
{
"epoch": 0.2002503128911139,
"grad_norm": 0.16143666207790375,
"learning_rate": 0.0001879396984924623,
"loss": 0.2429,
"step": 40
},
{
"epoch": 0.2503128911138924,
"grad_norm": 0.13474377989768982,
"learning_rate": 0.0001845896147403685,
"loss": 0.2332,
"step": 50
},
{
"epoch": 0.30037546933667086,
"grad_norm": 0.17662972211837769,
"learning_rate": 0.0001812395309882747,
"loss": 0.2085,
"step": 60
},
{
"epoch": 0.3504380475594493,
"grad_norm": 0.17377126216888428,
"learning_rate": 0.0001778894472361809,
"loss": 0.1961,
"step": 70
},
{
"epoch": 0.4005006257822278,
"grad_norm": 0.1938253790140152,
"learning_rate": 0.0001745393634840871,
"loss": 0.2032,
"step": 80
},
{
"epoch": 0.45056320400500627,
"grad_norm": 0.1416705697774887,
"learning_rate": 0.0001711892797319933,
"loss": 0.198,
"step": 90
},
{
"epoch": 0.5006257822277848,
"grad_norm": 0.16024866700172424,
"learning_rate": 0.0001678391959798995,
"loss": 0.185,
"step": 100
},
{
"epoch": 0.5006257822277848,
"eval_loss": 0.042159534990787506,
"eval_runtime": 125.0369,
"eval_samples_per_second": 0.344,
"eval_steps_per_second": 0.344,
"step": 100
},
{
"epoch": 0.5506883604505632,
"grad_norm": 0.1869831383228302,
"learning_rate": 0.0001644891122278057,
"loss": 0.1921,
"step": 110
},
{
"epoch": 0.6007509386733417,
"grad_norm": 0.15546876192092896,
"learning_rate": 0.0001611390284757119,
"loss": 0.1741,
"step": 120
},
{
"epoch": 0.6508135168961201,
"grad_norm": 0.1712736338376999,
"learning_rate": 0.0001577889447236181,
"loss": 0.174,
"step": 130
},
{
"epoch": 0.7008760951188986,
"grad_norm": 0.1676545888185501,
"learning_rate": 0.0001544388609715243,
"loss": 0.1571,
"step": 140
},
{
"epoch": 0.7509386733416771,
"grad_norm": 0.17751628160476685,
"learning_rate": 0.00015108877721943048,
"loss": 0.1778,
"step": 150
},
{
"epoch": 0.8010012515644556,
"grad_norm": 0.1387346237897873,
"learning_rate": 0.00014773869346733668,
"loss": 0.1532,
"step": 160
},
{
"epoch": 0.851063829787234,
"grad_norm": 0.17603643238544464,
"learning_rate": 0.00014438860971524288,
"loss": 0.1577,
"step": 170
},
{
"epoch": 0.9011264080100125,
"grad_norm": 0.1522763967514038,
"learning_rate": 0.00014103852596314908,
"loss": 0.1501,
"step": 180
},
{
"epoch": 0.951188986232791,
"grad_norm": 0.14780306816101074,
"learning_rate": 0.00013768844221105528,
"loss": 0.1499,
"step": 190
},
{
"epoch": 1.0,
"grad_norm": 0.11175049841403961,
"learning_rate": 0.00013433835845896147,
"loss": 0.1509,
"step": 200
},
{
"epoch": 1.0,
"eval_loss": 0.037981580942869186,
"eval_runtime": 125.2439,
"eval_samples_per_second": 0.343,
"eval_steps_per_second": 0.343,
"step": 200
},
{
"epoch": 1.0500625782227784,
"grad_norm": 0.15822191536426544,
"learning_rate": 0.00013098827470686767,
"loss": 0.1307,
"step": 210
},
{
"epoch": 1.100125156445557,
"grad_norm": 0.19546131789684296,
"learning_rate": 0.00012763819095477387,
"loss": 0.1189,
"step": 220
},
{
"epoch": 1.1501877346683353,
"grad_norm": 0.17685039341449738,
"learning_rate": 0.00012428810720268007,
"loss": 0.1296,
"step": 230
},
{
"epoch": 1.200250312891114,
"grad_norm": 0.17825502157211304,
"learning_rate": 0.00012093802345058627,
"loss": 0.1291,
"step": 240
},
{
"epoch": 1.2503128911138923,
"grad_norm": 0.19515497982501984,
"learning_rate": 0.00011758793969849247,
"loss": 0.1283,
"step": 250
},
{
"epoch": 1.300375469336671,
"grad_norm": 0.22185975313186646,
"learning_rate": 0.00011423785594639866,
"loss": 0.1358,
"step": 260
},
{
"epoch": 1.3504380475594493,
"grad_norm": 0.18015995621681213,
"learning_rate": 0.00011088777219430486,
"loss": 0.1284,
"step": 270
},
{
"epoch": 1.400500625782228,
"grad_norm": 0.2559189796447754,
"learning_rate": 0.00010753768844221106,
"loss": 0.1188,
"step": 280
},
{
"epoch": 1.4505632040050063,
"grad_norm": 0.17111122608184814,
"learning_rate": 0.00010418760469011726,
"loss": 0.1243,
"step": 290
},
{
"epoch": 1.5006257822277846,
"grad_norm": 0.19879887998104095,
"learning_rate": 0.00010083752093802346,
"loss": 0.1364,
"step": 300
},
{
"epoch": 1.5006257822277846,
"eval_loss": 0.03517143055796623,
"eval_runtime": 124.8471,
"eval_samples_per_second": 0.344,
"eval_steps_per_second": 0.344,
"step": 300
},
{
"epoch": 1.5506883604505632,
"grad_norm": 0.21239443123340607,
"learning_rate": 9.748743718592965e-05,
"loss": 0.1198,
"step": 310
},
{
"epoch": 1.6007509386733418,
"grad_norm": 0.21676813066005707,
"learning_rate": 9.413735343383585e-05,
"loss": 0.1309,
"step": 320
},
{
"epoch": 1.65081351689612,
"grad_norm": 0.2111431062221527,
"learning_rate": 9.078726968174205e-05,
"loss": 0.123,
"step": 330
},
{
"epoch": 1.7008760951188986,
"grad_norm": 0.17868830263614655,
"learning_rate": 8.743718592964825e-05,
"loss": 0.1133,
"step": 340
},
{
"epoch": 1.7509386733416772,
"grad_norm": 0.20656318962574005,
"learning_rate": 8.408710217755445e-05,
"loss": 0.119,
"step": 350
},
{
"epoch": 1.8010012515644556,
"grad_norm": 0.25555238127708435,
"learning_rate": 8.073701842546064e-05,
"loss": 0.1142,
"step": 360
},
{
"epoch": 1.851063829787234,
"grad_norm": 0.17207714915275574,
"learning_rate": 7.738693467336684e-05,
"loss": 0.1085,
"step": 370
},
{
"epoch": 1.9011264080100125,
"grad_norm": 0.16670793294906616,
"learning_rate": 7.403685092127304e-05,
"loss": 0.11,
"step": 380
},
{
"epoch": 1.9511889862327911,
"grad_norm": 0.20777879655361176,
"learning_rate": 7.068676716917924e-05,
"loss": 0.1094,
"step": 390
},
{
"epoch": 2.0,
"grad_norm": 0.20488321781158447,
"learning_rate": 6.733668341708544e-05,
"loss": 0.1182,
"step": 400
},
{
"epoch": 2.0,
"eval_loss": 0.03272353485226631,
"eval_runtime": 125.3359,
"eval_samples_per_second": 0.343,
"eval_steps_per_second": 0.343,
"step": 400
},
{
"epoch": 2.0500625782227786,
"grad_norm": 0.19323211908340454,
"learning_rate": 6.398659966499163e-05,
"loss": 0.0897,
"step": 410
},
{
"epoch": 2.1001251564455568,
"grad_norm": 0.21595624089241028,
"learning_rate": 6.063651591289783e-05,
"loss": 0.0761,
"step": 420
},
{
"epoch": 2.1501877346683353,
"grad_norm": 0.28912344574928284,
"learning_rate": 5.728643216080403e-05,
"loss": 0.0819,
"step": 430
},
{
"epoch": 2.200250312891114,
"grad_norm": 0.21744051575660706,
"learning_rate": 5.393634840871022e-05,
"loss": 0.0793,
"step": 440
},
{
"epoch": 2.2503128911138925,
"grad_norm": 0.2404051125049591,
"learning_rate": 5.058626465661642e-05,
"loss": 0.0793,
"step": 450
},
{
"epoch": 2.3003754693366707,
"grad_norm": 0.2571396827697754,
"learning_rate": 4.723618090452262e-05,
"loss": 0.0841,
"step": 460
},
{
"epoch": 2.3504380475594493,
"grad_norm": 0.19428133964538574,
"learning_rate": 4.3886097152428815e-05,
"loss": 0.0795,
"step": 470
},
{
"epoch": 2.400500625782228,
"grad_norm": 0.2305491715669632,
"learning_rate": 4.053601340033501e-05,
"loss": 0.0835,
"step": 480
},
{
"epoch": 2.4505632040050065,
"grad_norm": 0.25262993574142456,
"learning_rate": 3.7185929648241204e-05,
"loss": 0.0803,
"step": 490
},
{
"epoch": 2.5006257822277846,
"grad_norm": 0.22605903446674347,
"learning_rate": 3.38358458961474e-05,
"loss": 0.0852,
"step": 500
},
{
"epoch": 2.5006257822277846,
"eval_loss": 0.03292727842926979,
"eval_runtime": 125.3579,
"eval_samples_per_second": 0.343,
"eval_steps_per_second": 0.343,
"step": 500
}
],
"logging_steps": 10,
"max_steps": 597,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.632027949274235e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}