dragonclaw_model / checkpoint-433 /trainer_state.json
akacaptain's picture
Upload folder using huggingface_hub
956138b verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 433,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02313475997686524,
"grad_norm": 3.5443220138549805,
"learning_rate": 0.00013846153846153847,
"loss": 2.4906261444091795,
"step": 10
},
{
"epoch": 0.04626951995373048,
"grad_norm": 0.4789237976074219,
"learning_rate": 0.00019714285714285716,
"loss": 0.13569670915603638,
"step": 20
},
{
"epoch": 0.06940427993059572,
"grad_norm": 0.0071147712878882885,
"learning_rate": 0.0001923809523809524,
"loss": 0.023090672492980958,
"step": 30
},
{
"epoch": 0.09253903990746096,
"grad_norm": 0.031249770894646645,
"learning_rate": 0.00018761904761904763,
"loss": 0.07923950552940369,
"step": 40
},
{
"epoch": 0.1156737998843262,
"grad_norm": 0.23233748972415924,
"learning_rate": 0.00018285714285714286,
"loss": 0.01179821714758873,
"step": 50
},
{
"epoch": 0.13880855986119145,
"grad_norm": 1.0764169692993164,
"learning_rate": 0.0001780952380952381,
"loss": 0.04595586061477661,
"step": 60
},
{
"epoch": 0.16194331983805668,
"grad_norm": 0.4203377366065979,
"learning_rate": 0.00017333333333333334,
"loss": 0.020573070645332335,
"step": 70
},
{
"epoch": 0.18507807981492191,
"grad_norm": 0.06960943341255188,
"learning_rate": 0.00016857142857142857,
"loss": 0.02428939938545227,
"step": 80
},
{
"epoch": 0.20821283979178715,
"grad_norm": 0.761617124080658,
"learning_rate": 0.0001638095238095238,
"loss": 0.03832740783691406,
"step": 90
},
{
"epoch": 0.2313475997686524,
"grad_norm": 0.024328596889972687,
"learning_rate": 0.00015904761904761904,
"loss": 0.010582192242145539,
"step": 100
},
{
"epoch": 0.25448235974551764,
"grad_norm": 0.03712387755513191,
"learning_rate": 0.0001542857142857143,
"loss": 0.014394421875476838,
"step": 110
},
{
"epoch": 0.2776171197223829,
"grad_norm": 0.35509419441223145,
"learning_rate": 0.00014952380952380954,
"loss": 0.03347426652908325,
"step": 120
},
{
"epoch": 0.3007518796992481,
"grad_norm": 0.11692740023136139,
"learning_rate": 0.00014476190476190475,
"loss": 0.06869672536849976,
"step": 130
},
{
"epoch": 0.32388663967611336,
"grad_norm": 0.025060011073946953,
"learning_rate": 0.00014,
"loss": 0.002869569510221481,
"step": 140
},
{
"epoch": 0.3470213996529786,
"grad_norm": 0.04316815361380577,
"learning_rate": 0.00013523809523809525,
"loss": 0.017471878230571745,
"step": 150
},
{
"epoch": 0.37015615962984383,
"grad_norm": 0.24662676453590393,
"learning_rate": 0.0001304761904761905,
"loss": 0.04017368853092194,
"step": 160
},
{
"epoch": 0.3932909196067091,
"grad_norm": 0.013658199459314346,
"learning_rate": 0.00012571428571428572,
"loss": 0.011992159485816955,
"step": 170
},
{
"epoch": 0.4164256795835743,
"grad_norm": 0.011875756084918976,
"learning_rate": 0.00012095238095238095,
"loss": 0.003486642986536026,
"step": 180
},
{
"epoch": 0.43956043956043955,
"grad_norm": 0.008698398247361183,
"learning_rate": 0.00011619047619047621,
"loss": 0.009918726235628127,
"step": 190
},
{
"epoch": 0.4626951995373048,
"grad_norm": 0.012751366011798382,
"learning_rate": 0.00011142857142857144,
"loss": 0.0135767862200737,
"step": 200
},
{
"epoch": 0.48582995951417,
"grad_norm": 0.013968385756015778,
"learning_rate": 0.00010666666666666667,
"loss": 0.0009812915697693825,
"step": 210
},
{
"epoch": 0.5089647194910353,
"grad_norm": 0.38197270035743713,
"learning_rate": 0.0001019047619047619,
"loss": 0.010036008059978485,
"step": 220
},
{
"epoch": 0.5320994794679005,
"grad_norm": 0.008353643119335175,
"learning_rate": 9.714285714285715e-05,
"loss": 0.0008777316659688949,
"step": 230
},
{
"epoch": 0.5552342394447658,
"grad_norm": 0.03482387587428093,
"learning_rate": 9.238095238095239e-05,
"loss": 0.06720049977302552,
"step": 240
},
{
"epoch": 0.578368999421631,
"grad_norm": 0.017207808792591095,
"learning_rate": 8.761904761904762e-05,
"loss": 0.009993697702884673,
"step": 250
},
{
"epoch": 0.6015037593984962,
"grad_norm": 0.04262904077768326,
"learning_rate": 8.285714285714287e-05,
"loss": 0.005409357324242592,
"step": 260
},
{
"epoch": 0.6246385193753615,
"grad_norm": 0.010086641646921635,
"learning_rate": 7.80952380952381e-05,
"loss": 0.0022824501618742945,
"step": 270
},
{
"epoch": 0.6477732793522267,
"grad_norm": 0.019428474828600883,
"learning_rate": 7.333333333333333e-05,
"loss": 0.009420862048864364,
"step": 280
},
{
"epoch": 0.6709080393290919,
"grad_norm": 0.010999761521816254,
"learning_rate": 6.857142857142858e-05,
"loss": 0.017436870932579042,
"step": 290
},
{
"epoch": 0.6940427993059572,
"grad_norm": 0.03521187976002693,
"learning_rate": 6.38095238095238e-05,
"loss": 0.007618572562932968,
"step": 300
},
{
"epoch": 0.7171775592828225,
"grad_norm": 0.02904939278960228,
"learning_rate": 5.904761904761905e-05,
"loss": 0.009538635611534119,
"step": 310
},
{
"epoch": 0.7403123192596877,
"grad_norm": 0.28503215312957764,
"learning_rate": 5.428571428571428e-05,
"loss": 0.003535139188170433,
"step": 320
},
{
"epoch": 0.763447079236553,
"grad_norm": 0.1933276355266571,
"learning_rate": 4.9523809523809525e-05,
"loss": 0.00619364008307457,
"step": 330
},
{
"epoch": 0.7865818392134182,
"grad_norm": 0.007664988283067942,
"learning_rate": 4.476190476190477e-05,
"loss": 0.0007373106665909291,
"step": 340
},
{
"epoch": 0.8097165991902834,
"grad_norm": 0.5651599764823914,
"learning_rate": 4e-05,
"loss": 0.07755469083786011,
"step": 350
},
{
"epoch": 0.8328513591671486,
"grad_norm": 0.05437934026122093,
"learning_rate": 3.523809523809524e-05,
"loss": 0.015709011256694792,
"step": 360
},
{
"epoch": 0.8559861191440139,
"grad_norm": 0.061594076454639435,
"learning_rate": 3.0476190476190482e-05,
"loss": 0.04147002398967743,
"step": 370
},
{
"epoch": 0.8791208791208791,
"grad_norm": 0.03086649812757969,
"learning_rate": 2.5714285714285714e-05,
"loss": 0.0031579844653606415,
"step": 380
},
{
"epoch": 0.9022556390977443,
"grad_norm": 0.3715185523033142,
"learning_rate": 2.0952380952380954e-05,
"loss": 0.01193058043718338,
"step": 390
},
{
"epoch": 0.9253903990746096,
"grad_norm": 0.017640365287661552,
"learning_rate": 1.6190476190476193e-05,
"loss": 0.00927691012620926,
"step": 400
},
{
"epoch": 0.9485251590514748,
"grad_norm": 0.02302401326596737,
"learning_rate": 1.1428571428571429e-05,
"loss": 0.07495037913322448,
"step": 410
},
{
"epoch": 0.97165991902834,
"grad_norm": 0.013834159821271896,
"learning_rate": 6.666666666666667e-06,
"loss": 0.015109787881374358,
"step": 420
},
{
"epoch": 0.9947946790052054,
"grad_norm": 0.023592131212353706,
"learning_rate": 1.9047619047619051e-06,
"loss": 0.012308744341135025,
"step": 430
}
],
"logging_steps": 10,
"max_steps": 433,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3979819485155328.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}