avik1108's picture
Upload folder using huggingface_hub
c8ebbc9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.89875173370319,
"eval_steps": 500,
"global_step": 900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11095700416088766,
"grad_norm": 0.31662145256996155,
"learning_rate": 7.407407407407407e-05,
"loss": 0.5604,
"step": 10
},
{
"epoch": 0.22191400832177532,
"grad_norm": 0.38665255904197693,
"learning_rate": 0.00014814814814814815,
"loss": 0.3448,
"step": 20
},
{
"epoch": 0.332871012482663,
"grad_norm": 0.38282278180122375,
"learning_rate": 0.00019999417253661235,
"loss": 0.1345,
"step": 30
},
{
"epoch": 0.44382801664355065,
"grad_norm": 0.33959391713142395,
"learning_rate": 0.000199890592080658,
"loss": 0.1206,
"step": 40
},
{
"epoch": 0.5547850208044383,
"grad_norm": 0.2943621873855591,
"learning_rate": 0.00019965766682369186,
"loss": 0.1234,
"step": 50
},
{
"epoch": 0.665742024965326,
"grad_norm": 0.25359126925468445,
"learning_rate": 0.00019929569837240564,
"loss": 0.1039,
"step": 60
},
{
"epoch": 0.7766990291262136,
"grad_norm": 0.23930878937244415,
"learning_rate": 0.0001988051554269675,
"loss": 0.102,
"step": 70
},
{
"epoch": 0.8876560332871013,
"grad_norm": 0.2013150006532669,
"learning_rate": 0.00019818667317411865,
"loss": 0.0974,
"step": 80
},
{
"epoch": 0.9986130374479889,
"grad_norm": 0.25096118450164795,
"learning_rate": 0.00019744105246469263,
"loss": 0.099,
"step": 90
},
{
"epoch": 1.0998613037447988,
"grad_norm": 0.25178226828575134,
"learning_rate": 0.0001965692587766216,
"loss": 0.0714,
"step": 100
},
{
"epoch": 1.2108183079056865,
"grad_norm": 0.2704208195209503,
"learning_rate": 0.00019557242096477327,
"loss": 0.0771,
"step": 110
},
{
"epoch": 1.3217753120665743,
"grad_norm": 0.22107760608196259,
"learning_rate": 0.00019445182979923654,
"loss": 0.0703,
"step": 120
},
{
"epoch": 1.4327323162274619,
"grad_norm": 0.26953792572021484,
"learning_rate": 0.00019320893629394873,
"loss": 0.0753,
"step": 130
},
{
"epoch": 1.5436893203883495,
"grad_norm": 0.2142401486635208,
"learning_rate": 0.00019184534982782904,
"loss": 0.0724,
"step": 140
},
{
"epoch": 1.6546463245492373,
"grad_norm": 0.25699618458747864,
"learning_rate": 0.00019036283606085053,
"loss": 0.0648,
"step": 150
},
{
"epoch": 1.765603328710125,
"grad_norm": 0.2224379926919937,
"learning_rate": 0.00018876331464774945,
"loss": 0.0706,
"step": 160
},
{
"epoch": 1.8765603328710125,
"grad_norm": 0.23435620963573456,
"learning_rate": 0.0001870488567523318,
"loss": 0.0695,
"step": 170
},
{
"epoch": 1.9875173370319001,
"grad_norm": 0.18676415085792542,
"learning_rate": 0.00018522168236559695,
"loss": 0.0615,
"step": 180
},
{
"epoch": 2.08876560332871,
"grad_norm": 0.24162153899669647,
"learning_rate": 0.00018328415743114912,
"loss": 0.0445,
"step": 190
},
{
"epoch": 2.1997226074895977,
"grad_norm": 0.3869277536869049,
"learning_rate": 0.00018123879078162097,
"loss": 0.0502,
"step": 200
},
{
"epoch": 2.3106796116504853,
"grad_norm": 0.3037394881248474,
"learning_rate": 0.00017908823089007457,
"loss": 0.0482,
"step": 210
},
{
"epoch": 2.421636615811373,
"grad_norm": 0.18976379930973053,
"learning_rate": 0.00017683526244058716,
"loss": 0.0528,
"step": 220
},
{
"epoch": 2.5325936199722605,
"grad_norm": 0.30705705285072327,
"learning_rate": 0.00017448280272246212,
"loss": 0.0521,
"step": 230
},
{
"epoch": 2.6435506241331486,
"grad_norm": 0.21610881388187408,
"learning_rate": 0.000172033897852734,
"loss": 0.0535,
"step": 240
},
{
"epoch": 2.754507628294036,
"grad_norm": 0.18693220615386963,
"learning_rate": 0.00016949171883185918,
"loss": 0.0517,
"step": 250
},
{
"epoch": 2.8654646324549238,
"grad_norm": 0.3321268558502197,
"learning_rate": 0.0001668595574376992,
"loss": 0.0407,
"step": 260
},
{
"epoch": 2.9764216366158114,
"grad_norm": 0.20721495151519775,
"learning_rate": 0.000164140821963114,
"loss": 0.0417,
"step": 270
},
{
"epoch": 3.0776699029126213,
"grad_norm": 0.20151656866073608,
"learning_rate": 0.00016133903280268362,
"loss": 0.0373,
"step": 280
},
{
"epoch": 3.188626907073509,
"grad_norm": 0.3590203821659088,
"learning_rate": 0.00015845781789427377,
"loss": 0.0358,
"step": 290
},
{
"epoch": 3.2995839112343965,
"grad_norm": 0.20630675554275513,
"learning_rate": 0.000155500908021347,
"loss": 0.0299,
"step": 300
},
{
"epoch": 3.410540915395284,
"grad_norm": 0.3287246525287628,
"learning_rate": 0.000152472131982103,
"loss": 0.0331,
"step": 310
},
{
"epoch": 3.5214979195561718,
"grad_norm": 0.24394913017749786,
"learning_rate": 0.0001493754116317029,
"loss": 0.0368,
"step": 320
},
{
"epoch": 3.63245492371706,
"grad_norm": 0.20165830850601196,
"learning_rate": 0.0001462147568039977,
"loss": 0.0336,
"step": 330
},
{
"epoch": 3.7434119278779474,
"grad_norm": 0.2538021504878998,
"learning_rate": 0.00014299426011933568,
"loss": 0.0295,
"step": 340
},
{
"epoch": 3.854368932038835,
"grad_norm": 0.36229604482650757,
"learning_rate": 0.00013971809168517298,
"loss": 0.0358,
"step": 350
},
{
"epoch": 3.9653259361997226,
"grad_norm": 0.4092184603214264,
"learning_rate": 0.00013639049369634876,
"loss": 0.034,
"step": 360
},
{
"epoch": 4.066574202496533,
"grad_norm": 0.11960680782794952,
"learning_rate": 0.00013301577494201664,
"loss": 0.0233,
"step": 370
},
{
"epoch": 4.17753120665742,
"grad_norm": 0.26415354013442993,
"learning_rate": 0.00012959830522634596,
"loss": 0.02,
"step": 380
},
{
"epoch": 4.288488210818308,
"grad_norm": 0.21966516971588135,
"learning_rate": 0.00012614250971021657,
"loss": 0.0225,
"step": 390
},
{
"epoch": 4.399445214979195,
"grad_norm": 0.2905697524547577,
"learning_rate": 0.00012265286318123415,
"loss": 0.0244,
"step": 400
},
{
"epoch": 4.510402219140083,
"grad_norm": 0.24163606762886047,
"learning_rate": 0.00011913388425948584,
"loss": 0.017,
"step": 410
},
{
"epoch": 4.621359223300971,
"grad_norm": 0.40009695291519165,
"learning_rate": 0.00011559012954653865,
"loss": 0.0219,
"step": 420
},
{
"epoch": 4.732316227461858,
"grad_norm": 0.1963382512331009,
"learning_rate": 0.0001120261877252568,
"loss": 0.0179,
"step": 430
},
{
"epoch": 4.843273231622746,
"grad_norm": 0.33989155292510986,
"learning_rate": 0.00010844667361807842,
"loss": 0.0198,
"step": 440
},
{
"epoch": 4.954230235783633,
"grad_norm": 0.38484710454940796,
"learning_rate": 0.00010485622221144484,
"loss": 0.0249,
"step": 450
},
{
"epoch": 5.055478502080444,
"grad_norm": 0.18945415318012238,
"learning_rate": 0.00010125948265412033,
"loss": 0.0177,
"step": 460
},
{
"epoch": 5.166435506241331,
"grad_norm": 0.25906893610954285,
"learning_rate": 9.766111223717352e-05,
"loss": 0.0127,
"step": 470
},
{
"epoch": 5.277392510402219,
"grad_norm": 0.23804187774658203,
"learning_rate": 9.406577036341548e-05,
"loss": 0.0128,
"step": 480
},
{
"epoch": 5.388349514563107,
"grad_norm": 0.20456787943840027,
"learning_rate": 9.047811251410376e-05,
"loss": 0.0111,
"step": 490
},
{
"epoch": 5.499306518723994,
"grad_norm": 0.15757159888744354,
"learning_rate": 8.690278422072384e-05,
"loss": 0.0101,
"step": 500
},
{
"epoch": 5.610263522884882,
"grad_norm": 0.16691505908966064,
"learning_rate": 8.334441504965455e-05,
"loss": 0.0115,
"step": 510
},
{
"epoch": 5.721220527045769,
"grad_norm": 0.5055399537086487,
"learning_rate": 7.980761260750607e-05,
"loss": 0.0088,
"step": 520
},
{
"epoch": 5.832177531206657,
"grad_norm": 0.15076065063476562,
"learning_rate": 7.629695657489257e-05,
"loss": 0.0117,
"step": 530
},
{
"epoch": 5.943134535367545,
"grad_norm": 0.09655993431806564,
"learning_rate": 7.281699277636572e-05,
"loss": 0.0111,
"step": 540
},
{
"epoch": 6.044382801664355,
"grad_norm": 0.4866645336151123,
"learning_rate": 6.93722272941869e-05,
"loss": 0.0092,
"step": 550
},
{
"epoch": 6.155339805825243,
"grad_norm": 0.1816895604133606,
"learning_rate": 6.59671206335602e-05,
"loss": 0.0082,
"step": 560
},
{
"epoch": 6.26629680998613,
"grad_norm": 0.22271257638931274,
"learning_rate": 6.260608194688206e-05,
"loss": 0.0046,
"step": 570
},
{
"epoch": 6.377253814147018,
"grad_norm": 0.06787201762199402,
"learning_rate": 5.929346332448511e-05,
"loss": 0.0051,
"step": 580
},
{
"epoch": 6.4882108183079055,
"grad_norm": 0.09298055619001389,
"learning_rate": 5.6033554159270294e-05,
"loss": 0.0054,
"step": 590
},
{
"epoch": 6.599167822468793,
"grad_norm": 0.03731105476617813,
"learning_rate": 5.283057559252341e-05,
"loss": 0.0053,
"step": 600
},
{
"epoch": 6.710124826629681,
"grad_norm": 0.10652171820402145,
"learning_rate": 4.96886750481082e-05,
"loss": 0.0057,
"step": 610
},
{
"epoch": 6.821081830790568,
"grad_norm": 0.2607424259185791,
"learning_rate": 4.661192086211366e-05,
"loss": 0.0077,
"step": 620
},
{
"epoch": 6.932038834951456,
"grad_norm": 0.11328639835119247,
"learning_rate": 4.360429701490934e-05,
"loss": 0.0073,
"step": 630
},
{
"epoch": 7.033287101248266,
"grad_norm": 0.0941685363650322,
"learning_rate": 4.06696979724298e-05,
"loss": 0.0039,
"step": 640
},
{
"epoch": 7.144244105409154,
"grad_norm": 0.45776239037513733,
"learning_rate": 3.7811923643367974e-05,
"loss": 0.0032,
"step": 650
},
{
"epoch": 7.2552011095700415,
"grad_norm": 0.08863729238510132,
"learning_rate": 3.503467445880789e-05,
"loss": 0.0026,
"step": 660
},
{
"epoch": 7.366158113730929,
"grad_norm": 0.04661976918578148,
"learning_rate": 3.2341546580666796e-05,
"loss": 0.0024,
"step": 670
},
{
"epoch": 7.477115117891817,
"grad_norm": 0.08003357797861099,
"learning_rate": 2.9736027245152275e-05,
"loss": 0.0022,
"step": 680
},
{
"epoch": 7.588072122052704,
"grad_norm": 0.15967042744159698,
"learning_rate": 2.722149024726307e-05,
"loss": 0.0024,
"step": 690
},
{
"epoch": 7.699029126213592,
"grad_norm": 0.0572751984000206,
"learning_rate": 2.480119157218108e-05,
"loss": 0.003,
"step": 700
},
{
"epoch": 7.8099861303744795,
"grad_norm": 0.0780700072646141,
"learning_rate": 2.247826517921121e-05,
"loss": 0.0035,
"step": 710
},
{
"epoch": 7.920943134535367,
"grad_norm": 0.19474399089813232,
"learning_rate": 2.025571894372794e-05,
"loss": 0.0027,
"step": 720
},
{
"epoch": 8.022191400832178,
"grad_norm": 0.12848657369613647,
"learning_rate": 1.813643076238375e-05,
"loss": 0.002,
"step": 730
},
{
"epoch": 8.133148404993065,
"grad_norm": 0.05772533640265465,
"learning_rate": 1.6123144826622504e-05,
"loss": 0.0017,
"step": 740
},
{
"epoch": 8.244105409153953,
"grad_norm": 0.14121367037296295,
"learning_rate": 1.4218468069322578e-05,
"loss": 0.0013,
"step": 750
},
{
"epoch": 8.35506241331484,
"grad_norm": 0.14342299103736877,
"learning_rate": 1.2424866789171729e-05,
"loss": 0.0016,
"step": 760
},
{
"epoch": 8.466019417475728,
"grad_norm": 0.03438349440693855,
"learning_rate": 1.0744663457143878e-05,
"loss": 0.0011,
"step": 770
},
{
"epoch": 8.576976421636616,
"grad_norm": 0.0756613090634346,
"learning_rate": 9.180033709213454e-06,
"loss": 0.0017,
"step": 780
},
{
"epoch": 8.687933425797503,
"grad_norm": 0.0464102178812027,
"learning_rate": 7.733003529201278e-06,
"loss": 0.0014,
"step": 790
},
{
"epoch": 8.79889042995839,
"grad_norm": 0.12452979385852814,
"learning_rate": 6.405446625399481e-06,
"loss": 0.0015,
"step": 800
},
{
"epoch": 8.909847434119278,
"grad_norm": 0.08071909099817276,
"learning_rate": 5.199082004372957e-06,
"loss": 0.0014,
"step": 810
},
{
"epoch": 9.011095700416089,
"grad_norm": 0.06948132812976837,
"learning_rate": 4.115471745078314e-06,
"loss": 0.0012,
"step": 820
},
{
"epoch": 9.122052704576976,
"grad_norm": 0.07605510950088501,
"learning_rate": 3.1560189761830728e-06,
"loss": 0.0009,
"step": 830
},
{
"epoch": 9.233009708737864,
"grad_norm": 0.0312280785292387,
"learning_rate": 2.3219660592038285e-06,
"loss": 0.0012,
"step": 840
},
{
"epoch": 9.343966712898752,
"grad_norm": 0.02329327166080475,
"learning_rate": 1.6143929798162704e-06,
"loss": 0.001,
"step": 850
},
{
"epoch": 9.45492371705964,
"grad_norm": 0.08054498583078384,
"learning_rate": 1.034215949419748e-06,
"loss": 0.0012,
"step": 860
},
{
"epoch": 9.565880721220527,
"grad_norm": 0.09850303828716278,
"learning_rate": 5.821862187675775e-07,
"loss": 0.0011,
"step": 870
},
{
"epoch": 9.676837725381414,
"grad_norm": 0.08373916149139404,
"learning_rate": 2.588891051988895e-07,
"loss": 0.0019,
"step": 880
},
{
"epoch": 9.787794729542302,
"grad_norm": 0.017217393964529037,
"learning_rate": 6.474323473194543e-08,
"loss": 0.0009,
"step": 890
},
{
"epoch": 9.89875173370319,
"grad_norm": 0.04848321154713631,
"learning_rate": 0.0,
"loss": 0.0009,
"step": 900
}
],
"logging_steps": 10,
"max_steps": 900,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.301284175906406e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}