ces-phase3b-lora / checkpoint-2000 /trainer_state.json
baglecake's picture
Upload folder using huggingface_hub
a18502c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.2125069175428886,
"eval_steps": 500,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05534034311012728,
"grad_norm": 0.7568074464797974,
"learning_rate": 9.8e-05,
"loss": 1.0759,
"step": 50
},
{
"epoch": 0.11068068622025456,
"grad_norm": 0.14674387872219086,
"learning_rate": 0.00019800000000000002,
"loss": 0.1883,
"step": 100
},
{
"epoch": 0.16602102933038185,
"grad_norm": 0.13491740822792053,
"learning_rate": 0.0001998263839556516,
"loss": 0.1823,
"step": 150
},
{
"epoch": 0.22136137244050913,
"grad_norm": 0.11964733898639679,
"learning_rate": 0.00019929192281085555,
"loss": 0.1812,
"step": 200
},
{
"epoch": 0.27670171555063644,
"grad_norm": 0.14810685813426971,
"learning_rate": 0.0001983984765530473,
"loss": 0.181,
"step": 250
},
{
"epoch": 0.3320420586607637,
"grad_norm": 0.11917376518249512,
"learning_rate": 0.0001971492753936756,
"loss": 0.1801,
"step": 300
},
{
"epoch": 0.387382401770891,
"grad_norm": 0.11992548406124115,
"learning_rate": 0.0001955488357587162,
"loss": 0.1777,
"step": 350
},
{
"epoch": 0.44272274488101826,
"grad_norm": 0.09655219316482544,
"learning_rate": 0.00019360294395975392,
"loss": 0.1778,
"step": 400
},
{
"epoch": 0.49806308799114557,
"grad_norm": 0.08890487998723984,
"learning_rate": 0.00019131863527385433,
"loss": 0.1776,
"step": 450
},
{
"epoch": 0.5534034311012729,
"grad_norm": 0.09338568150997162,
"learning_rate": 0.0001887041685078625,
"loss": 0.1756,
"step": 500
},
{
"epoch": 0.6087437742114001,
"grad_norm": 0.10447146743535995,
"learning_rate": 0.0001857689961390886,
"loss": 0.1769,
"step": 550
},
{
"epoch": 0.6640841173215274,
"grad_norm": 0.08940507471561432,
"learning_rate": 0.00018252373014033646,
"loss": 0.1767,
"step": 600
},
{
"epoch": 0.7194244604316546,
"grad_norm": 0.1015540286898613,
"learning_rate": 0.0001789801036128327,
"loss": 0.1749,
"step": 650
},
{
"epoch": 0.774764803541782,
"grad_norm": 0.0866508036851883,
"learning_rate": 0.0001751509283657702,
"loss": 0.1765,
"step": 700
},
{
"epoch": 0.8301051466519093,
"grad_norm": 0.08853679150342941,
"learning_rate": 0.00017105004859583578,
"loss": 0.1757,
"step": 750
},
{
"epoch": 0.8854454897620365,
"grad_norm": 0.09371698647737503,
"learning_rate": 0.00016669229083419114,
"loss": 0.1766,
"step": 800
},
{
"epoch": 0.9407858328721638,
"grad_norm": 0.8600781559944153,
"learning_rate": 0.00016209341034187125,
"loss": 0.342,
"step": 850
},
{
"epoch": 0.9961261759822911,
"grad_norm": 0.0968112051486969,
"learning_rate": 0.00015727003414740492,
"loss": 0.1816,
"step": 900
},
{
"epoch": 1.0509131156613172,
"grad_norm": 0.092947818338871,
"learning_rate": 0.00015223960093260294,
"loss": 0.1753,
"step": 950
},
{
"epoch": 1.1062534587714443,
"grad_norm": 0.07544200122356415,
"learning_rate": 0.00014702029798385264,
"loss": 0.1747,
"step": 1000
},
{
"epoch": 1.1615938018815717,
"grad_norm": 0.06907663494348526,
"learning_rate": 0.00014163099543686964,
"loss": 0.1741,
"step": 1050
},
{
"epoch": 1.2169341449916988,
"grad_norm": 0.07450341433286667,
"learning_rate": 0.00013609117805264063,
"loss": 0.1754,
"step": 1100
},
{
"epoch": 1.2722744881018262,
"grad_norm": 0.09569600224494934,
"learning_rate": 0.0001304208747712189,
"loss": 0.1732,
"step": 1150
},
{
"epoch": 1.3276148312119536,
"grad_norm": 0.08210264891386032,
"learning_rate": 0.00012464058629806633,
"loss": 0.1716,
"step": 1200
},
{
"epoch": 1.3829551743220807,
"grad_norm": 0.06857864558696747,
"learning_rate": 0.00011877121098475106,
"loss": 0.1728,
"step": 1250
},
{
"epoch": 1.438295517432208,
"grad_norm": 0.11595187336206436,
"learning_rate": 0.00011283396927197472,
"loss": 0.174,
"step": 1300
},
{
"epoch": 1.4936358605423354,
"grad_norm": 0.07529956847429276,
"learning_rate": 0.00010685032696810226,
"loss": 0.1733,
"step": 1350
},
{
"epoch": 1.5489762036524626,
"grad_norm": 0.0644264817237854,
"learning_rate": 0.00010084191764057676,
"loss": 0.1738,
"step": 1400
},
{
"epoch": 1.60431654676259,
"grad_norm": 0.060298092663288116,
"learning_rate": 9.483046440080949e-05,
"loss": 0.1717,
"step": 1450
},
{
"epoch": 1.6596568898727173,
"grad_norm": 0.059512991458177567,
"learning_rate": 8.883770136532834e-05,
"loss": 0.1735,
"step": 1500
},
{
"epoch": 1.7149972329828445,
"grad_norm": 0.0744907408952713,
"learning_rate": 8.288529507713752e-05,
"loss": 0.1722,
"step": 1550
},
{
"epoch": 1.7703375760929718,
"grad_norm": 0.05729057267308235,
"learning_rate": 7.699476617138598e-05,
"loss": 0.1728,
"step": 1600
},
{
"epoch": 1.8256779192030992,
"grad_norm": 0.06146302446722984,
"learning_rate": 7.118741156855904e-05,
"loss": 0.1714,
"step": 1650
},
{
"epoch": 1.8810182623132263,
"grad_norm": 0.08402097970247269,
"learning_rate": 6.548422747649902e-05,
"loss": 0.1711,
"step": 1700
},
{
"epoch": 1.9363586054233535,
"grad_norm": 0.06482277065515518,
"learning_rate": 5.990583347963793e-05,
"loss": 0.1714,
"step": 1750
},
{
"epoch": 1.991698948533481,
"grad_norm": 0.06089329719543457,
"learning_rate": 5.44723979898939e-05,
"loss": 0.1711,
"step": 1800
},
{
"epoch": 2.0464858882125068,
"grad_norm": 0.053777534514665604,
"learning_rate": 4.9203565328759604e-05,
"loss": 0.1708,
"step": 1850
},
{
"epoch": 2.1018262313226344,
"grad_norm": 0.052074234932661057,
"learning_rate": 4.411838470421454e-05,
"loss": 0.169,
"step": 1900
},
{
"epoch": 2.1571665744327615,
"grad_norm": 0.04878537356853485,
"learning_rate": 3.923524133924069e-05,
"loss": 0.1705,
"step": 1950
},
{
"epoch": 2.2125069175428886,
"grad_norm": 0.06310058385133743,
"learning_rate": 3.4571790000943973e-05,
"loss": 0.1706,
"step": 2000
}
],
"logging_steps": 50,
"max_steps": 2712,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.341042688193413e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}