onegaiosu's picture
Upload Squash T5 code corruptor model
497ad22 verified
{
"best_global_step": 4180,
"best_metric": 3.6224466271050915e-07,
"best_model_checkpoint": "./code_corruptor_model_v2\\checkpoint-4180",
"epoch": 12.0,
"eval_steps": 500,
"global_step": 4560,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.13157894736842105,
"grad_norm": 0.10753314197063446,
"learning_rate": 4.948464912280702e-05,
"loss": 0.4466,
"step": 50
},
{
"epoch": 0.2631578947368421,
"grad_norm": 0.006062635686248541,
"learning_rate": 4.8936403508771935e-05,
"loss": 0.0037,
"step": 100
},
{
"epoch": 0.39473684210526316,
"grad_norm": 0.008465762250125408,
"learning_rate": 4.838815789473685e-05,
"loss": 0.001,
"step": 150
},
{
"epoch": 0.5263157894736842,
"grad_norm": 0.01755240559577942,
"learning_rate": 4.7839912280701754e-05,
"loss": 0.0015,
"step": 200
},
{
"epoch": 0.6578947368421053,
"grad_norm": 0.042610205709934235,
"learning_rate": 4.7291666666666666e-05,
"loss": 0.0009,
"step": 250
},
{
"epoch": 0.7894736842105263,
"grad_norm": 0.05834781005978584,
"learning_rate": 4.674342105263158e-05,
"loss": 0.0006,
"step": 300
},
{
"epoch": 0.9210526315789473,
"grad_norm": 0.0006559228058904409,
"learning_rate": 4.619517543859649e-05,
"loss": 0.0005,
"step": 350
},
{
"epoch": 1.0,
"eval_loss": 1.6028698155423626e-05,
"eval_runtime": 6.2183,
"eval_samples_per_second": 24.444,
"eval_steps_per_second": 12.222,
"step": 380
},
{
"epoch": 1.0526315789473684,
"grad_norm": 0.055409740656614304,
"learning_rate": 4.5646929824561405e-05,
"loss": 0.0005,
"step": 400
},
{
"epoch": 1.1842105263157894,
"grad_norm": 0.07685278356075287,
"learning_rate": 4.509868421052632e-05,
"loss": 0.0007,
"step": 450
},
{
"epoch": 1.3157894736842106,
"grad_norm": 0.0034991835709661245,
"learning_rate": 4.455043859649123e-05,
"loss": 0.0004,
"step": 500
},
{
"epoch": 1.4473684210526316,
"grad_norm": 0.0010067017283290625,
"learning_rate": 4.400219298245614e-05,
"loss": 0.0002,
"step": 550
},
{
"epoch": 1.5789473684210527,
"grad_norm": 0.0004999448428861797,
"learning_rate": 4.3453947368421056e-05,
"loss": 0.0005,
"step": 600
},
{
"epoch": 1.7105263157894737,
"grad_norm": 0.0048265825025737286,
"learning_rate": 4.290570175438597e-05,
"loss": 0.0001,
"step": 650
},
{
"epoch": 1.8421052631578947,
"grad_norm": 0.0009590413537807763,
"learning_rate": 4.235745614035088e-05,
"loss": 0.0002,
"step": 700
},
{
"epoch": 1.973684210526316,
"grad_norm": 0.0026596838142722845,
"learning_rate": 4.180921052631579e-05,
"loss": 0.0002,
"step": 750
},
{
"epoch": 2.0,
"eval_loss": 9.181250788969919e-06,
"eval_runtime": 6.0411,
"eval_samples_per_second": 25.161,
"eval_steps_per_second": 12.58,
"step": 760
},
{
"epoch": 2.1052631578947367,
"grad_norm": 0.0004917697515338659,
"learning_rate": 4.12609649122807e-05,
"loss": 0.0003,
"step": 800
},
{
"epoch": 2.236842105263158,
"grad_norm": 0.0010589464800432324,
"learning_rate": 4.071271929824562e-05,
"loss": 0.0005,
"step": 850
},
{
"epoch": 2.3684210526315788,
"grad_norm": 0.001737129525281489,
"learning_rate": 4.016447368421053e-05,
"loss": 0.0004,
"step": 900
},
{
"epoch": 2.5,
"grad_norm": 0.00043353348155505955,
"learning_rate": 3.9616228070175445e-05,
"loss": 0.0002,
"step": 950
},
{
"epoch": 2.6315789473684212,
"grad_norm": 0.000876229431014508,
"learning_rate": 3.906798245614035e-05,
"loss": 0.0001,
"step": 1000
},
{
"epoch": 2.763157894736842,
"grad_norm": 0.006859931629151106,
"learning_rate": 3.8519736842105264e-05,
"loss": 0.0,
"step": 1050
},
{
"epoch": 2.8947368421052633,
"grad_norm": 0.00029508452280424535,
"learning_rate": 3.7971491228070176e-05,
"loss": 0.0005,
"step": 1100
},
{
"epoch": 3.0,
"eval_loss": 4.4037507905159146e-05,
"eval_runtime": 6.0562,
"eval_samples_per_second": 25.098,
"eval_steps_per_second": 12.549,
"step": 1140
},
{
"epoch": 3.026315789473684,
"grad_norm": 0.02006879821419716,
"learning_rate": 3.742324561403509e-05,
"loss": 0.001,
"step": 1150
},
{
"epoch": 3.1578947368421053,
"grad_norm": 0.0015200282214209437,
"learning_rate": 3.6875e-05,
"loss": 0.0001,
"step": 1200
},
{
"epoch": 3.2894736842105265,
"grad_norm": 0.0007221988635137677,
"learning_rate": 3.6326754385964915e-05,
"loss": 0.0,
"step": 1250
},
{
"epoch": 3.4210526315789473,
"grad_norm": 0.0002022625703830272,
"learning_rate": 3.577850877192983e-05,
"loss": 0.0002,
"step": 1300
},
{
"epoch": 3.5526315789473686,
"grad_norm": 0.2107187658548355,
"learning_rate": 3.523026315789474e-05,
"loss": 0.0009,
"step": 1350
},
{
"epoch": 3.6842105263157894,
"grad_norm": 0.0004320333246141672,
"learning_rate": 3.468201754385965e-05,
"loss": 0.0009,
"step": 1400
},
{
"epoch": 3.8157894736842106,
"grad_norm": 0.0014643239555880427,
"learning_rate": 3.4133771929824566e-05,
"loss": 0.0005,
"step": 1450
},
{
"epoch": 3.9473684210526314,
"grad_norm": 0.0002134596143150702,
"learning_rate": 3.358552631578947e-05,
"loss": 0.0,
"step": 1500
},
{
"epoch": 4.0,
"eval_loss": 1.688454176473897e-05,
"eval_runtime": 6.1697,
"eval_samples_per_second": 24.637,
"eval_steps_per_second": 12.318,
"step": 1520
},
{
"epoch": 4.078947368421052,
"grad_norm": 0.007211349904537201,
"learning_rate": 3.3037280701754384e-05,
"loss": 0.0002,
"step": 1550
},
{
"epoch": 4.2105263157894735,
"grad_norm": 0.00045391780440695584,
"learning_rate": 3.24890350877193e-05,
"loss": 0.0003,
"step": 1600
},
{
"epoch": 4.342105263157895,
"grad_norm": 0.00023115136718843132,
"learning_rate": 3.194078947368421e-05,
"loss": 0.0001,
"step": 1650
},
{
"epoch": 4.473684210526316,
"grad_norm": 0.14033561944961548,
"learning_rate": 3.139254385964913e-05,
"loss": 0.0001,
"step": 1700
},
{
"epoch": 4.605263157894737,
"grad_norm": 0.0017103628488257527,
"learning_rate": 3.0844298245614035e-05,
"loss": 0.0001,
"step": 1750
},
{
"epoch": 4.7368421052631575,
"grad_norm": 0.0032462095841765404,
"learning_rate": 3.0296052631578948e-05,
"loss": 0.0,
"step": 1800
},
{
"epoch": 4.868421052631579,
"grad_norm": 0.002826864365488291,
"learning_rate": 2.974780701754386e-05,
"loss": 0.0001,
"step": 1850
},
{
"epoch": 5.0,
"grad_norm": 0.0006184170488268137,
"learning_rate": 2.9199561403508774e-05,
"loss": 0.0005,
"step": 1900
},
{
"epoch": 5.0,
"eval_loss": 3.343406660860637e-06,
"eval_runtime": 6.1148,
"eval_samples_per_second": 24.858,
"eval_steps_per_second": 12.429,
"step": 1900
},
{
"epoch": 5.131578947368421,
"grad_norm": 0.0009092154796235263,
"learning_rate": 2.8651315789473686e-05,
"loss": 0.0,
"step": 1950
},
{
"epoch": 5.2631578947368425,
"grad_norm": 0.0006162663921713829,
"learning_rate": 2.81030701754386e-05,
"loss": 0.0,
"step": 2000
},
{
"epoch": 5.394736842105263,
"grad_norm": 0.0013979279901832342,
"learning_rate": 2.755482456140351e-05,
"loss": 0.0001,
"step": 2050
},
{
"epoch": 5.526315789473684,
"grad_norm": 0.00045684297219850123,
"learning_rate": 2.700657894736842e-05,
"loss": 0.0,
"step": 2100
},
{
"epoch": 5.657894736842105,
"grad_norm": 0.017059462144970894,
"learning_rate": 2.6458333333333334e-05,
"loss": 0.0,
"step": 2150
},
{
"epoch": 5.7894736842105265,
"grad_norm": 9.451019286643714e-05,
"learning_rate": 2.591008771929825e-05,
"loss": 0.0001,
"step": 2200
},
{
"epoch": 5.921052631578947,
"grad_norm": 0.0002531503851059824,
"learning_rate": 2.5361842105263163e-05,
"loss": 0.0002,
"step": 2250
},
{
"epoch": 6.0,
"eval_loss": 8.532630317859002e-07,
"eval_runtime": 6.1846,
"eval_samples_per_second": 24.577,
"eval_steps_per_second": 12.289,
"step": 2280
},
{
"epoch": 6.052631578947368,
"grad_norm": 0.00012987718218937516,
"learning_rate": 2.4813596491228072e-05,
"loss": 0.0,
"step": 2300
},
{
"epoch": 6.184210526315789,
"grad_norm": 8.179421274689957e-05,
"learning_rate": 2.426535087719298e-05,
"loss": 0.0,
"step": 2350
},
{
"epoch": 6.315789473684211,
"grad_norm": 0.0004638760001398623,
"learning_rate": 2.3717105263157898e-05,
"loss": 0.0,
"step": 2400
},
{
"epoch": 6.447368421052632,
"grad_norm": 0.0002866844297386706,
"learning_rate": 2.316885964912281e-05,
"loss": 0.0005,
"step": 2450
},
{
"epoch": 6.578947368421053,
"grad_norm": 7.315115362871438e-05,
"learning_rate": 2.262061403508772e-05,
"loss": 0.0,
"step": 2500
},
{
"epoch": 6.7105263157894735,
"grad_norm": 0.00015406313468702137,
"learning_rate": 2.2072368421052632e-05,
"loss": 0.0,
"step": 2550
},
{
"epoch": 6.842105263157895,
"grad_norm": 8.98495563887991e-05,
"learning_rate": 2.1524122807017545e-05,
"loss": 0.0,
"step": 2600
},
{
"epoch": 6.973684210526316,
"grad_norm": 0.00020581792341545224,
"learning_rate": 2.0975877192982458e-05,
"loss": 0.0,
"step": 2650
},
{
"epoch": 7.0,
"eval_loss": 8.157680895237718e-07,
"eval_runtime": 6.1011,
"eval_samples_per_second": 24.914,
"eval_steps_per_second": 12.457,
"step": 2660
},
{
"epoch": 7.105263157894737,
"grad_norm": 0.00012650905409827828,
"learning_rate": 2.042763157894737e-05,
"loss": 0.0,
"step": 2700
},
{
"epoch": 7.2368421052631575,
"grad_norm": 0.00016898708418011665,
"learning_rate": 1.987938596491228e-05,
"loss": 0.0,
"step": 2750
},
{
"epoch": 7.368421052631579,
"grad_norm": 0.0003560652839951217,
"learning_rate": 1.9331140350877193e-05,
"loss": 0.0,
"step": 2800
},
{
"epoch": 7.5,
"grad_norm": 8.456506475340575e-05,
"learning_rate": 1.8782894736842105e-05,
"loss": 0.0,
"step": 2850
},
{
"epoch": 7.631578947368421,
"grad_norm": 0.00023642393352929503,
"learning_rate": 1.8234649122807018e-05,
"loss": 0.0,
"step": 2900
},
{
"epoch": 7.7631578947368425,
"grad_norm": 8.907222945708781e-05,
"learning_rate": 1.768640350877193e-05,
"loss": 0.0,
"step": 2950
},
{
"epoch": 7.894736842105263,
"grad_norm": 6.318661326076835e-05,
"learning_rate": 1.7138157894736844e-05,
"loss": 0.0,
"step": 3000
},
{
"epoch": 8.0,
"eval_loss": 5.999586392135825e-07,
"eval_runtime": 6.1117,
"eval_samples_per_second": 24.87,
"eval_steps_per_second": 12.435,
"step": 3040
},
{
"epoch": 8.026315789473685,
"grad_norm": 0.04527832567691803,
"learning_rate": 1.6589912280701756e-05,
"loss": 0.0002,
"step": 3050
},
{
"epoch": 8.157894736842104,
"grad_norm": 0.0002466822334099561,
"learning_rate": 1.604166666666667e-05,
"loss": 0.0,
"step": 3100
},
{
"epoch": 8.289473684210526,
"grad_norm": 0.00023732382396701723,
"learning_rate": 1.549342105263158e-05,
"loss": 0.0,
"step": 3150
},
{
"epoch": 8.421052631578947,
"grad_norm": 8.941477426560596e-05,
"learning_rate": 1.4945175438596493e-05,
"loss": 0.0001,
"step": 3200
},
{
"epoch": 8.552631578947368,
"grad_norm": 0.00011901962716365233,
"learning_rate": 1.4396929824561402e-05,
"loss": 0.0,
"step": 3250
},
{
"epoch": 8.68421052631579,
"grad_norm": 9.798636165214702e-05,
"learning_rate": 1.3848684210526317e-05,
"loss": 0.0,
"step": 3300
},
{
"epoch": 8.81578947368421,
"grad_norm": 3.7345100281527266e-05,
"learning_rate": 1.330043859649123e-05,
"loss": 0.0,
"step": 3350
},
{
"epoch": 8.947368421052632,
"grad_norm": 2.8350032152957283e-05,
"learning_rate": 1.275219298245614e-05,
"loss": 0.0,
"step": 3400
},
{
"epoch": 9.0,
"eval_loss": 4.4665341647487367e-07,
"eval_runtime": 6.1054,
"eval_samples_per_second": 24.896,
"eval_steps_per_second": 12.448,
"step": 3420
},
{
"epoch": 9.078947368421053,
"grad_norm": 8.318301843246445e-05,
"learning_rate": 1.2203947368421053e-05,
"loss": 0.0,
"step": 3450
},
{
"epoch": 9.210526315789474,
"grad_norm": 4.566019197227433e-05,
"learning_rate": 1.1655701754385966e-05,
"loss": 0.0,
"step": 3500
},
{
"epoch": 9.342105263157896,
"grad_norm": 0.03128642588853836,
"learning_rate": 1.1107456140350877e-05,
"loss": 0.0,
"step": 3550
},
{
"epoch": 9.473684210526315,
"grad_norm": 4.535232073976658e-05,
"learning_rate": 1.055921052631579e-05,
"loss": 0.0,
"step": 3600
},
{
"epoch": 9.605263157894736,
"grad_norm": 7.957038906170055e-05,
"learning_rate": 1.0010964912280703e-05,
"loss": 0.0,
"step": 3650
},
{
"epoch": 9.736842105263158,
"grad_norm": 5.397196946432814e-05,
"learning_rate": 9.462719298245615e-06,
"loss": 0.0,
"step": 3700
},
{
"epoch": 9.868421052631579,
"grad_norm": 0.00015735568013042212,
"learning_rate": 8.914473684210526e-06,
"loss": 0.0,
"step": 3750
},
{
"epoch": 10.0,
"grad_norm": 3.112037666141987e-05,
"learning_rate": 8.36622807017544e-06,
"loss": 0.0,
"step": 3800
},
{
"epoch": 10.0,
"eval_loss": 3.8959919379522034e-07,
"eval_runtime": 6.0027,
"eval_samples_per_second": 25.322,
"eval_steps_per_second": 12.661,
"step": 3800
},
{
"epoch": 10.131578947368421,
"grad_norm": 2.3204265744425356e-05,
"learning_rate": 7.81798245614035e-06,
"loss": 0.0,
"step": 3850
},
{
"epoch": 10.263157894736842,
"grad_norm": 9.128025703830644e-05,
"learning_rate": 7.269736842105264e-06,
"loss": 0.0,
"step": 3900
},
{
"epoch": 10.394736842105264,
"grad_norm": 1.550123852211982e-05,
"learning_rate": 6.721491228070176e-06,
"loss": 0.0,
"step": 3950
},
{
"epoch": 10.526315789473685,
"grad_norm": 0.00013698793190997094,
"learning_rate": 6.173245614035088e-06,
"loss": 0.0,
"step": 4000
},
{
"epoch": 10.657894736842106,
"grad_norm": 3.800446938839741e-05,
"learning_rate": 5.625e-06,
"loss": 0.0,
"step": 4050
},
{
"epoch": 10.789473684210526,
"grad_norm": 1.4189299690769985e-05,
"learning_rate": 5.076754385964912e-06,
"loss": 0.0,
"step": 4100
},
{
"epoch": 10.921052631578947,
"grad_norm": 4.4892298319609836e-05,
"learning_rate": 4.528508771929825e-06,
"loss": 0.0,
"step": 4150
},
{
"epoch": 11.0,
"eval_loss": 3.6224466271050915e-07,
"eval_runtime": 6.0832,
"eval_samples_per_second": 24.987,
"eval_steps_per_second": 12.493,
"step": 4180
},
{
"epoch": 11.052631578947368,
"grad_norm": 0.00010307016054866835,
"learning_rate": 3.980263157894737e-06,
"loss": 0.0,
"step": 4200
},
{
"epoch": 11.18421052631579,
"grad_norm": 0.0009463855531066656,
"learning_rate": 3.4320175438596496e-06,
"loss": 0.0,
"step": 4250
},
{
"epoch": 11.31578947368421,
"grad_norm": 3.068053410970606e-05,
"learning_rate": 2.8837719298245615e-06,
"loss": 0.0,
"step": 4300
},
{
"epoch": 11.447368421052632,
"grad_norm": 0.0005556969554163516,
"learning_rate": 2.335526315789474e-06,
"loss": 0.0,
"step": 4350
},
{
"epoch": 11.578947368421053,
"grad_norm": 2.4526205379515886e-05,
"learning_rate": 1.787280701754386e-06,
"loss": 0.0,
"step": 4400
},
{
"epoch": 11.710526315789474,
"grad_norm": 6.120463513070717e-05,
"learning_rate": 1.2390350877192983e-06,
"loss": 0.0,
"step": 4450
},
{
"epoch": 11.842105263157894,
"grad_norm": 0.00036876738886348903,
"learning_rate": 6.907894736842105e-07,
"loss": 0.0,
"step": 4500
},
{
"epoch": 11.973684210526315,
"grad_norm": 4.0039503801381215e-05,
"learning_rate": 1.4254385964912283e-07,
"loss": 0.0,
"step": 4550
},
{
"epoch": 12.0,
"eval_loss": 3.6446573403736693e-07,
"eval_runtime": 6.0772,
"eval_samples_per_second": 25.011,
"eval_steps_per_second": 12.506,
"step": 4560
}
],
"logging_steps": 50,
"max_steps": 4560,
"num_input_tokens_seen": 0,
"num_train_epochs": 12,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5546388467220480.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}