modrill's picture
Add files using upload-large-folder tool
ebbe3fd verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 766,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.013061224489795919,
"grad_norm": 0.013593924231827259,
"learning_rate": 1.8461538461538465e-05,
"loss": 1.0348053932189942,
"step": 10
},
{
"epoch": 0.026122448979591838,
"grad_norm": 0.010592319071292877,
"learning_rate": 3.8974358974358976e-05,
"loss": 0.9912956237792969,
"step": 20
},
{
"epoch": 0.03918367346938775,
"grad_norm": 0.008065517991781235,
"learning_rate": 5.9487179487179495e-05,
"loss": 0.9145261764526367,
"step": 30
},
{
"epoch": 0.052244897959183675,
"grad_norm": 0.006928236689418554,
"learning_rate": 8e-05,
"loss": 0.8690940856933593,
"step": 40
},
{
"epoch": 0.0653061224489796,
"grad_norm": 0.0065370709635317326,
"learning_rate": 7.996265836446255e-05,
"loss": 0.8447072982788086,
"step": 50
},
{
"epoch": 0.0783673469387755,
"grad_norm": 0.005765452049672604,
"learning_rate": 7.985070317773737e-05,
"loss": 0.8226842880249023,
"step": 60
},
{
"epoch": 0.09142857142857143,
"grad_norm": 0.004979082383215427,
"learning_rate": 7.966434346931348e-05,
"loss": 0.8047774314880372,
"step": 70
},
{
"epoch": 0.10448979591836735,
"grad_norm": 0.00686669023707509,
"learning_rate": 7.940392718800637e-05,
"loss": 0.7929642200469971,
"step": 80
},
{
"epoch": 0.11755102040816326,
"grad_norm": 0.006248346995562315,
"learning_rate": 7.90699405523093e-05,
"loss": 0.7915477275848388,
"step": 90
},
{
"epoch": 0.1306122448979592,
"grad_norm": 0.007850440219044685,
"learning_rate": 7.86630071425835e-05,
"loss": 0.7851225376129151,
"step": 100
},
{
"epoch": 0.1436734693877551,
"grad_norm": 0.007440537214279175,
"learning_rate": 7.818388673678265e-05,
"loss": 0.7773007869720459,
"step": 110
},
{
"epoch": 0.156734693877551,
"grad_norm": 0.007219385821372271,
"learning_rate": 7.763347389188538e-05,
"loss": 0.7723363399505615,
"step": 120
},
{
"epoch": 0.16979591836734695,
"grad_norm": 0.006354185286909342,
"learning_rate": 7.701279627368412e-05,
"loss": 0.7682206153869628,
"step": 130
},
{
"epoch": 0.18285714285714286,
"grad_norm": 0.006955909077078104,
"learning_rate": 7.632301273804914e-05,
"loss": 0.7699796676635742,
"step": 140
},
{
"epoch": 0.19591836734693877,
"grad_norm": 0.008277718909084797,
"learning_rate": 7.556541116724981e-05,
"loss": 0.764019775390625,
"step": 150
},
{
"epoch": 0.2089795918367347,
"grad_norm": 0.007425008807331324,
"learning_rate": 7.474140606537311e-05,
"loss": 0.7628626823425293,
"step": 160
},
{
"epoch": 0.2220408163265306,
"grad_norm": 0.008092896081507206,
"learning_rate": 7.38525359173288e-05,
"loss": 0.7607949256896973,
"step": 170
},
{
"epoch": 0.23510204081632652,
"grad_norm": 0.011249990202486515,
"learning_rate": 7.290046031637228e-05,
"loss": 0.757351303100586,
"step": 180
},
{
"epoch": 0.24816326530612245,
"grad_norm": 0.009156743064522743,
"learning_rate": 7.188695686550836e-05,
"loss": 0.7565219879150391,
"step": 190
},
{
"epoch": 0.2612244897959184,
"grad_norm": 0.008516875095665455,
"learning_rate": 7.081391785856087e-05,
"loss": 0.7473669052124023,
"step": 200
},
{
"epoch": 0.2742857142857143,
"grad_norm": 0.008919311687350273,
"learning_rate": 6.96833467471056e-05,
"loss": 0.7444419860839844,
"step": 210
},
{
"epoch": 0.2873469387755102,
"grad_norm": 0.007335508707910776,
"learning_rate": 6.84973543998622e-05,
"loss": 0.7573845386505127,
"step": 220
},
{
"epoch": 0.3004081632653061,
"grad_norm": 0.007334326393902302,
"learning_rate": 6.725815516152973e-05,
"loss": 0.7524682998657226,
"step": 230
},
{
"epoch": 0.313469387755102,
"grad_norm": 0.006889250595122576,
"learning_rate": 6.596806271842398e-05,
"loss": 0.7463503837585449,
"step": 240
},
{
"epoch": 0.32653061224489793,
"grad_norm": 0.007522549480199814,
"learning_rate": 6.462948577863593e-05,
"loss": 0.7468090057373047,
"step": 250
},
{
"epoch": 0.3395918367346939,
"grad_norm": 0.00689704529941082,
"learning_rate": 6.324492357477686e-05,
"loss": 0.745818042755127,
"step": 260
},
{
"epoch": 0.3526530612244898,
"grad_norm": 0.007402004674077034,
"learning_rate": 6.18169611977065e-05,
"loss": 0.737040901184082,
"step": 270
},
{
"epoch": 0.3657142857142857,
"grad_norm": 0.006718257907778025,
"learning_rate": 6.034826476995715e-05,
"loss": 0.7412730693817139,
"step": 280
},
{
"epoch": 0.3787755102040816,
"grad_norm": 0.006786980666220188,
"learning_rate": 5.8841576467864825e-05,
"loss": 0.7408377170562744,
"step": 290
},
{
"epoch": 0.39183673469387753,
"grad_norm": 0.006618338171392679,
"learning_rate": 5.7299709401701805e-05,
"loss": 0.7430388927459717,
"step": 300
},
{
"epoch": 0.4048979591836735,
"grad_norm": 0.007314841262996197,
"learning_rate": 5.572554236336965e-05,
"loss": 0.7401338577270508,
"step": 310
},
{
"epoch": 0.4179591836734694,
"grad_norm": 0.0067168474197387695,
"learning_rate": 5.4122014451459385e-05,
"loss": 0.7423385143280029,
"step": 320
},
{
"epoch": 0.4310204081632653,
"grad_norm": 0.007062443997710943,
"learning_rate": 5.2492119583714064e-05,
"loss": 0.7407833099365234,
"step": 330
},
{
"epoch": 0.4440816326530612,
"grad_norm": 0.009489820338785648,
"learning_rate": 5.083890090713949e-05,
"loss": 0.7376296997070313,
"step": 340
},
{
"epoch": 0.45714285714285713,
"grad_norm": 0.006291185040026903,
"learning_rate": 4.916544511619984e-05,
"loss": 0.7393476963043213,
"step": 350
},
{
"epoch": 0.47020408163265304,
"grad_norm": 0.007340357638895512,
"learning_rate": 4.747487668970681e-05,
"loss": 0.7434526443481445,
"step": 360
},
{
"epoch": 0.483265306122449,
"grad_norm": 0.007148618344217539,
"learning_rate": 4.5770352057162046e-05,
"loss": 0.7274169445037841,
"step": 370
},
{
"epoch": 0.4963265306122449,
"grad_norm": 0.007421489339321852,
"learning_rate": 4.405505370544521e-05,
"loss": 0.7373303413391114,
"step": 380
},
{
"epoch": 0.5093877551020408,
"grad_norm": 0.006447239778935909,
"learning_rate": 4.233218423685071e-05,
"loss": 0.7334803581237793,
"step": 390
},
{
"epoch": 0.5224489795918368,
"grad_norm": 0.007035430055111647,
"learning_rate": 4.060496038956728e-05,
"loss": 0.7342597961425781,
"step": 400
},
{
"epoch": 0.5355102040816326,
"grad_norm": 0.006574620492756367,
"learning_rate": 3.887660703176474e-05,
"loss": 0.7356997966766358,
"step": 410
},
{
"epoch": 0.5485714285714286,
"grad_norm": 0.0066298553720116615,
"learning_rate": 3.7150351140501457e-05,
"loss": 0.7381177425384522,
"step": 420
},
{
"epoch": 0.5616326530612245,
"grad_norm": 0.007492161355912685,
"learning_rate": 3.542941577669424e-05,
"loss": 0.7291494369506836,
"step": 430
},
{
"epoch": 0.5746938775510204,
"grad_norm": 0.006217462942004204,
"learning_rate": 3.3717014067400025e-05,
"loss": 0.7233750343322753,
"step": 440
},
{
"epoch": 0.5877551020408164,
"grad_norm": 0.006911132019013166,
"learning_rate": 3.201634320664491e-05,
"loss": 0.7269360542297363,
"step": 450
},
{
"epoch": 0.6008163265306122,
"grad_norm": 0.00729888491332531,
"learning_rate": 3.0330578486001478e-05,
"loss": 0.7219826698303222,
"step": 460
},
{
"epoch": 0.6138775510204082,
"grad_norm": 0.007913697510957718,
"learning_rate": 2.8662867366059758e-05,
"loss": 0.7285231590270996,
"step": 470
},
{
"epoch": 0.626938775510204,
"grad_norm": 0.006929247174412012,
"learning_rate": 2.7016323599860833e-05,
"loss": 0.7285576820373535,
"step": 480
},
{
"epoch": 0.64,
"grad_norm": 0.006759402342140675,
"learning_rate": 2.5394021419265458e-05,
"loss": 0.7239264965057373,
"step": 490
},
{
"epoch": 0.6530612244897959,
"grad_norm": 0.006527756340801716,
"learning_rate": 2.379898979511156e-05,
"loss": 0.731016731262207,
"step": 500
},
{
"epoch": 0.6661224489795918,
"grad_norm": 0.006792586762458086,
"learning_rate": 2.2234206781878127e-05,
"loss": 0.7236400604248047,
"step": 510
},
{
"epoch": 0.6791836734693878,
"grad_norm": 0.006418135017156601,
"learning_rate": 2.0702593957413973e-05,
"loss": 0.7233975887298584,
"step": 520
},
{
"epoch": 0.6922448979591836,
"grad_norm": 0.006609582342207432,
"learning_rate": 1.9207010968112856e-05,
"loss": 0.7252939224243165,
"step": 530
},
{
"epoch": 0.7053061224489796,
"grad_norm": 0.006350552197545767,
"learning_rate": 1.7750250189719885e-05,
"loss": 0.7322314739227295,
"step": 540
},
{
"epoch": 0.7183673469387755,
"grad_norm": 0.006282226648181677,
"learning_rate": 1.633503151373769e-05,
"loss": 0.718090009689331,
"step": 550
},
{
"epoch": 0.7314285714285714,
"grad_norm": 0.006717463489621878,
"learning_rate": 1.4963997269166472e-05,
"loss": 0.7251626491546631,
"step": 560
},
{
"epoch": 0.7444897959183674,
"grad_norm": 0.006243122275918722,
"learning_rate": 1.363970728905975e-05,
"loss": 0.7236129760742187,
"step": 570
},
{
"epoch": 0.7575510204081632,
"grad_norm": 0.005929226521402597,
"learning_rate": 1.2364634131106663e-05,
"loss": 0.7275202751159668,
"step": 580
},
{
"epoch": 0.7706122448979592,
"grad_norm": 0.006825726944953203,
"learning_rate": 1.11411584611646e-05,
"loss": 0.7266074657440186,
"step": 590
},
{
"epoch": 0.7836734693877551,
"grad_norm": 0.006338095758110285,
"learning_rate": 9.971564608361387e-06,
"loss": 0.7227339744567871,
"step": 600
},
{
"epoch": 0.796734693877551,
"grad_norm": 0.006189221516251564,
"learning_rate": 8.858036300065912e-06,
"loss": 0.7260101318359375,
"step": 610
},
{
"epoch": 0.809795918367347,
"grad_norm": 0.006516415625810623,
"learning_rate": 7.802652584690627e-06,
"loss": 0.721678638458252,
"step": 620
},
{
"epoch": 0.8228571428571428,
"grad_norm": 0.006069981027394533,
"learning_rate": 6.807383949938131e-06,
"loss": 0.7275302886962891,
"step": 630
},
{
"epoch": 0.8359183673469388,
"grad_norm": 0.006245093885809183,
"learning_rate": 5.874088643739453e-06,
"loss": 0.7232262134552002,
"step": 640
},
{
"epoch": 0.8489795918367347,
"grad_norm": 0.0058577232994139194,
"learning_rate": 5.0045092047532385e-06,
"loss": 0.7290368556976319,
"step": 650
},
{
"epoch": 0.8620408163265306,
"grad_norm": 0.005565746687352657,
"learning_rate": 4.200269208903569e-06,
"loss": 0.7283772945404052,
"step": 660
},
{
"epoch": 0.8751020408163265,
"grad_norm": 0.006886645220220089,
"learning_rate": 3.4628702380309266e-06,
"loss": 0.7242953300476074,
"step": 670
},
{
"epoch": 0.8881632653061224,
"grad_norm": 0.005979357752948999,
"learning_rate": 2.793689076316111e-06,
"loss": 0.7234395503997803,
"step": 680
},
{
"epoch": 0.9012244897959184,
"grad_norm": 0.005459806881844997,
"learning_rate": 2.193975139711575e-06,
"loss": 0.7297232151031494,
"step": 690
},
{
"epoch": 0.9142857142857143,
"grad_norm": 0.00546460272744298,
"learning_rate": 1.6648481431797137e-06,
"loss": 0.7135615348815918,
"step": 700
},
{
"epoch": 0.9273469387755102,
"grad_norm": 0.005548370536416769,
"learning_rate": 1.2072960100933862e-06,
"loss": 0.7257327079772949,
"step": 710
},
{
"epoch": 0.9404081632653061,
"grad_norm": 0.005945554003119469,
"learning_rate": 8.221730277022488e-07,
"loss": 0.7289669036865234,
"step": 720
},
{
"epoch": 0.953469387755102,
"grad_norm": 0.005690570455044508,
"learning_rate": 5.101982521085847e-07,
"loss": 0.7285196781158447,
"step": 730
},
{
"epoch": 0.966530612244898,
"grad_norm": 0.005644120275974274,
"learning_rate": 2.719541657307456e-07,
"loss": 0.7271464347839356,
"step": 740
},
{
"epoch": 0.9795918367346939,
"grad_norm": 0.005760515108704567,
"learning_rate": 1.0788558976085662e-07,
"loss": 0.726295280456543,
"step": 750
},
{
"epoch": 0.9926530612244898,
"grad_norm": 0.006554395891726017,
"learning_rate": 1.8298853647267245e-08,
"loss": 0.7278533458709717,
"step": 760
},
{
"epoch": 1.0,
"step": 766,
"total_flos": 7.613569370341507e+18,
"train_loss": 0.7527124293479223,
"train_runtime": 22851.6673,
"train_samples_per_second": 2.144,
"train_steps_per_second": 0.034
}
],
"logging_steps": 10,
"max_steps": 766,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.613569370341507e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}