gpt2-xl-lora-multi-4 / trainer_state.json
MHGanainy's picture
MHGanainy/gpt2-xl-lora-multi-4
7f1983c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 5641,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01772735330615139,
"grad_norm": 0.04697391018271446,
"learning_rate": 4.432624113475178e-07,
"loss": 2.8098,
"step": 100
},
{
"epoch": 0.03545470661230278,
"grad_norm": 0.0493495836853981,
"learning_rate": 8.865248226950356e-07,
"loss": 2.8311,
"step": 200
},
{
"epoch": 0.05318205991845418,
"grad_norm": 0.05300679802894592,
"learning_rate": 1.3297872340425533e-06,
"loss": 2.8299,
"step": 300
},
{
"epoch": 0.07090941322460556,
"grad_norm": 0.06578100472688675,
"learning_rate": 1.7730496453900712e-06,
"loss": 2.8051,
"step": 400
},
{
"epoch": 0.08863676653075696,
"grad_norm": 0.07819516211748123,
"learning_rate": 2.2118794326241137e-06,
"loss": 2.8049,
"step": 500
},
{
"epoch": 0.10636411983690836,
"grad_norm": 0.08765570819377899,
"learning_rate": 2.6551418439716316e-06,
"loss": 2.8175,
"step": 600
},
{
"epoch": 0.12409147314305974,
"grad_norm": 0.10122698545455933,
"learning_rate": 3.098404255319149e-06,
"loss": 2.7791,
"step": 700
},
{
"epoch": 0.14181882644921112,
"grad_norm": 0.11092907190322876,
"learning_rate": 3.5416666666666673e-06,
"loss": 2.7762,
"step": 800
},
{
"epoch": 0.15954617975536253,
"grad_norm": 0.11580634117126465,
"learning_rate": 3.984929078014185e-06,
"loss": 2.7725,
"step": 900
},
{
"epoch": 0.17727353306151392,
"grad_norm": 0.13618434965610504,
"learning_rate": 4.428191489361702e-06,
"loss": 2.7711,
"step": 1000
},
{
"epoch": 0.1950008863676653,
"grad_norm": 0.13589724898338318,
"learning_rate": 4.871453900709221e-06,
"loss": 2.7544,
"step": 1100
},
{
"epoch": 0.2127282396738167,
"grad_norm": 0.1407850682735443,
"learning_rate": 5.314716312056738e-06,
"loss": 2.7616,
"step": 1200
},
{
"epoch": 0.2304555929799681,
"grad_norm": 0.16542290151119232,
"learning_rate": 5.757978723404256e-06,
"loss": 2.7719,
"step": 1300
},
{
"epoch": 0.24818294628611948,
"grad_norm": 0.16397430002689362,
"learning_rate": 6.2012411347517734e-06,
"loss": 2.7557,
"step": 1400
},
{
"epoch": 0.26591029959227086,
"grad_norm": 0.16306352615356445,
"learning_rate": 6.644503546099291e-06,
"loss": 2.742,
"step": 1500
},
{
"epoch": 0.28363765289842224,
"grad_norm": 0.18215014040470123,
"learning_rate": 7.087765957446809e-06,
"loss": 2.7483,
"step": 1600
},
{
"epoch": 0.3013650062045737,
"grad_norm": 0.16818368434906006,
"learning_rate": 7.531028368794326e-06,
"loss": 2.7402,
"step": 1700
},
{
"epoch": 0.31909235951072507,
"grad_norm": 0.17571307718753815,
"learning_rate": 7.974290780141844e-06,
"loss": 2.7278,
"step": 1800
},
{
"epoch": 0.33681971281687645,
"grad_norm": 0.18817897140979767,
"learning_rate": 8.417553191489362e-06,
"loss": 2.7383,
"step": 1900
},
{
"epoch": 0.35454706612302783,
"grad_norm": 0.2033829391002655,
"learning_rate": 8.86081560283688e-06,
"loss": 2.735,
"step": 2000
},
{
"epoch": 0.3722744194291792,
"grad_norm": 0.19735954701900482,
"learning_rate": 9.304078014184398e-06,
"loss": 2.7255,
"step": 2100
},
{
"epoch": 0.3900017727353306,
"grad_norm": 0.22808896005153656,
"learning_rate": 9.747340425531916e-06,
"loss": 2.711,
"step": 2200
},
{
"epoch": 0.407729126041482,
"grad_norm": 0.2145451158285141,
"learning_rate": 1.0190602836879434e-05,
"loss": 2.7341,
"step": 2300
},
{
"epoch": 0.4254564793476334,
"grad_norm": 0.2078903764486313,
"learning_rate": 1.0633865248226952e-05,
"loss": 2.7165,
"step": 2400
},
{
"epoch": 0.4431838326537848,
"grad_norm": 0.2379077523946762,
"learning_rate": 1.107712765957447e-05,
"loss": 2.7087,
"step": 2500
},
{
"epoch": 0.4609111859599362,
"grad_norm": 0.21793442964553833,
"learning_rate": 1.1520390070921987e-05,
"loss": 2.7173,
"step": 2600
},
{
"epoch": 0.47863853926608757,
"grad_norm": 0.2230735719203949,
"learning_rate": 1.1963652482269505e-05,
"loss": 2.7031,
"step": 2700
},
{
"epoch": 0.49636589257223895,
"grad_norm": 0.2279297262430191,
"learning_rate": 1.2406914893617021e-05,
"loss": 2.7035,
"step": 2800
},
{
"epoch": 0.5140932458783903,
"grad_norm": 0.24638701975345612,
"learning_rate": 1.285017730496454e-05,
"loss": 2.6969,
"step": 2900
},
{
"epoch": 0.5318205991845417,
"grad_norm": 0.2320908159017563,
"learning_rate": 1.3293439716312057e-05,
"loss": 2.7033,
"step": 3000
},
{
"epoch": 0.5495479524906931,
"grad_norm": 0.23391854763031006,
"learning_rate": 1.3736702127659575e-05,
"loss": 2.6866,
"step": 3100
},
{
"epoch": 0.5672753057968445,
"grad_norm": 0.2577739357948303,
"learning_rate": 1.4179964539007095e-05,
"loss": 2.6941,
"step": 3200
},
{
"epoch": 0.5850026591029959,
"grad_norm": 0.24674548208713531,
"learning_rate": 1.4623226950354613e-05,
"loss": 2.6943,
"step": 3300
},
{
"epoch": 0.6027300124091474,
"grad_norm": 0.276777982711792,
"learning_rate": 1.5066489361702127e-05,
"loss": 2.6852,
"step": 3400
},
{
"epoch": 0.6204573657152987,
"grad_norm": 0.2453552931547165,
"learning_rate": 1.5509751773049645e-05,
"loss": 2.6822,
"step": 3500
},
{
"epoch": 0.6381847190214501,
"grad_norm": 0.2694833278656006,
"learning_rate": 1.5953014184397165e-05,
"loss": 2.6817,
"step": 3600
},
{
"epoch": 0.6559120723276015,
"grad_norm": 0.2562524080276489,
"learning_rate": 1.639627659574468e-05,
"loss": 2.6801,
"step": 3700
},
{
"epoch": 0.6736394256337529,
"grad_norm": 0.24736031889915466,
"learning_rate": 1.68395390070922e-05,
"loss": 2.6694,
"step": 3800
},
{
"epoch": 0.6913667789399043,
"grad_norm": 0.25903522968292236,
"learning_rate": 1.728280141843972e-05,
"loss": 2.6682,
"step": 3900
},
{
"epoch": 0.7090941322460557,
"grad_norm": 0.2558101713657379,
"learning_rate": 1.7726063829787233e-05,
"loss": 2.664,
"step": 4000
},
{
"epoch": 0.726821485552207,
"grad_norm": 0.25806924700737,
"learning_rate": 1.8169326241134752e-05,
"loss": 2.6725,
"step": 4100
},
{
"epoch": 0.7445488388583584,
"grad_norm": 0.26673123240470886,
"learning_rate": 1.8612588652482272e-05,
"loss": 2.6739,
"step": 4200
},
{
"epoch": 0.7622761921645098,
"grad_norm": 0.2724100947380066,
"learning_rate": 1.9055851063829788e-05,
"loss": 2.6706,
"step": 4300
},
{
"epoch": 0.7800035454706612,
"grad_norm": 0.2800888121128082,
"learning_rate": 1.9499113475177308e-05,
"loss": 2.6668,
"step": 4400
},
{
"epoch": 0.7977308987768126,
"grad_norm": 0.2775346338748932,
"learning_rate": 1.9942375886524824e-05,
"loss": 2.6652,
"step": 4500
},
{
"epoch": 0.815458252082964,
"grad_norm": 0.282071053981781,
"learning_rate": 1.9708392780483382e-05,
"loss": 2.6562,
"step": 4600
},
{
"epoch": 0.8331856053891155,
"grad_norm": 0.2759540379047394,
"learning_rate": 1.8676440399345303e-05,
"loss": 2.6686,
"step": 4700
},
{
"epoch": 0.8509129586952668,
"grad_norm": 0.27361398935317993,
"learning_rate": 1.6976990989182965e-05,
"loss": 2.6674,
"step": 4800
},
{
"epoch": 0.8686403120014182,
"grad_norm": 0.2933979332447052,
"learning_rate": 1.4740786810652702e-05,
"loss": 2.653,
"step": 4900
},
{
"epoch": 0.8863676653075696,
"grad_norm": 0.277789443731308,
"learning_rate": 1.2139863811304302e-05,
"loss": 2.6543,
"step": 5000
},
{
"epoch": 0.904095018613721,
"grad_norm": 0.2968071699142456,
"learning_rate": 9.374316533931709e-06,
"loss": 2.6589,
"step": 5100
},
{
"epoch": 0.9218223719198724,
"grad_norm": 0.2793715000152588,
"learning_rate": 6.656904417464698e-06,
"loss": 2.6495,
"step": 5200
},
{
"epoch": 0.9395497252260238,
"grad_norm": 0.27239373326301575,
"learning_rate": 4.196683760441683e-06,
"loss": 2.6603,
"step": 5300
},
{
"epoch": 0.9572770785321751,
"grad_norm": 0.2786346673965454,
"learning_rate": 2.1829245749555094e-06,
"loss": 2.6586,
"step": 5400
},
{
"epoch": 0.9750044318383265,
"grad_norm": 0.27085384726524353,
"learning_rate": 7.705496417336055e-07,
"loss": 2.6533,
"step": 5500
},
{
"epoch": 0.9927317851444779,
"grad_norm": 0.29114964604377747,
"learning_rate": 6.821597084026966e-08,
"loss": 2.6639,
"step": 5600
},
{
"epoch": 1.0,
"step": 5641,
"total_flos": 8.219951597501809e+17,
"train_loss": 2.7133784883306755,
"train_runtime": 1594.1832,
"train_samples_per_second": 56.61,
"train_steps_per_second": 3.538
}
],
"logging_steps": 100,
"max_steps": 5641,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.219951597501809e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}