qlora-bert-sentiment / checkpoint-2000 /trainer_state.json
Rogarcia18's picture
Upload folder using huggingface_hub
681d9bd verified
{
"best_global_step": 2000,
"best_metric": 0.924,
"best_model_checkpoint": "./qlora-bert-sentiment/checkpoint-2000",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"grad_norm": 2.0022401809692383,
"learning_rate": 0.0001225,
"loss": 0.703,
"step": 50
},
{
"epoch": 0.1,
"grad_norm": 1.6413620710372925,
"learning_rate": 0.0002475,
"loss": 0.6735,
"step": 100
},
{
"epoch": 0.15,
"grad_norm": 6.632427215576172,
"learning_rate": 0.0003725,
"loss": 0.5469,
"step": 150
},
{
"epoch": 0.2,
"grad_norm": 1.8693019151687622,
"learning_rate": 0.0004975,
"loss": 0.6154,
"step": 200
},
{
"epoch": 0.25,
"grad_norm": 7.796047687530518,
"learning_rate": 0.0004990863240477266,
"loss": 0.3135,
"step": 250
},
{
"epoch": 0.3,
"grad_norm": 0.8750956654548645,
"learning_rate": 0.0004962773315386935,
"loss": 0.3788,
"step": 300
},
{
"epoch": 0.35,
"grad_norm": 2.438046455383301,
"learning_rate": 0.0004915940198303324,
"loss": 0.3203,
"step": 350
},
{
"epoch": 0.4,
"grad_norm": 0.4791859984397888,
"learning_rate": 0.00048507203175260474,
"loss": 0.2855,
"step": 400
},
{
"epoch": 0.45,
"grad_norm": 6.871181964874268,
"learning_rate": 0.0004767610035728662,
"loss": 0.3167,
"step": 450
},
{
"epoch": 0.5,
"grad_norm": 0.08755309879779816,
"learning_rate": 0.0004667241872339007,
"loss": 0.2732,
"step": 500
},
{
"epoch": 0.55,
"grad_norm": 0.43568190932273865,
"learning_rate": 0.00045503796896844307,
"loss": 0.3608,
"step": 550
},
{
"epoch": 0.6,
"grad_norm": 3.768226385116577,
"learning_rate": 0.00044179128795382493,
"loss": 0.2945,
"step": 600
},
{
"epoch": 0.65,
"grad_norm": 4.359898567199707,
"learning_rate": 0.00042708495943113224,
"loss": 0.2697,
"step": 650
},
{
"epoch": 0.7,
"grad_norm": 1.9498214721679688,
"learning_rate": 0.00041103090744034666,
"loss": 0.3245,
"step": 700
},
{
"epoch": 0.75,
"grad_norm": 0.7939767241477966,
"learning_rate": 0.0003937513130108197,
"loss": 0.2775,
"step": 750
},
{
"epoch": 0.8,
"grad_norm": 7.53210973739624,
"learning_rate": 0.00037537768428986434,
"loss": 0.333,
"step": 800
},
{
"epoch": 0.85,
"grad_norm": 1.3197941780090332,
"learning_rate": 0.0003560498556863475,
"loss": 0.2535,
"step": 850
},
{
"epoch": 0.9,
"grad_norm": 4.376391410827637,
"learning_rate": 0.0003359149236464041,
"loss": 0.2585,
"step": 900
},
{
"epoch": 0.95,
"grad_norm": 4.018824577331543,
"learning_rate": 0.00031512612716066215,
"loss": 0.2368,
"step": 950
},
{
"epoch": 1.0,
"grad_norm": 1.9961744546890259,
"learning_rate": 0.00029384168152299676,
"loss": 0.2533,
"step": 1000
},
{
"epoch": 1.0,
"eval_accuracy": 0.9185,
"eval_loss": 0.27835872769355774,
"eval_runtime": 47.563,
"eval_samples_per_second": 42.049,
"eval_steps_per_second": 21.025,
"step": 1000
},
{
"epoch": 1.05,
"grad_norm": 3.986208915710449,
"learning_rate": 0.00027222357421661044,
"loss": 0.2054,
"step": 1050
},
{
"epoch": 1.1,
"grad_norm": 4.131945610046387,
"learning_rate": 0.0002504363320914746,
"loss": 0.1693,
"step": 1100
},
{
"epoch": 1.15,
"grad_norm": 0.10492417216300964,
"learning_rate": 0.00022864576921565816,
"loss": 0.2037,
"step": 1150
},
{
"epoch": 1.2,
"grad_norm": 0.286766916513443,
"learning_rate": 0.00020701772493014758,
"loss": 0.2305,
"step": 1200
},
{
"epoch": 1.25,
"grad_norm": 4.297652721405029,
"learning_rate": 0.000185716801711326,
"loss": 0.1592,
"step": 1250
},
{
"epoch": 1.3,
"grad_norm": 1.9070364236831665,
"learning_rate": 0.00016490511244673752,
"loss": 0.1998,
"step": 1300
},
{
"epoch": 1.35,
"grad_norm": 0.04250560328364372,
"learning_rate": 0.00014474104665812727,
"loss": 0.1699,
"step": 1350
},
{
"epoch": 1.4,
"grad_norm": 0.16112375259399414,
"learning_rate": 0.00012537806506154246,
"loss": 0.2727,
"step": 1400
},
{
"epoch": 1.45,
"grad_norm": 6.064806938171387,
"learning_rate": 0.000106963531638621,
"loss": 0.2407,
"step": 1450
},
{
"epoch": 1.5,
"grad_norm": 1.4757925271987915,
"learning_rate": 8.963759210771053e-05,
"loss": 0.1953,
"step": 1500
},
{
"epoch": 1.55,
"grad_norm": 0.12172006070613861,
"learning_rate": 7.353210733032975e-05,
"loss": 0.1397,
"step": 1550
},
{
"epoch": 1.6,
"grad_norm": 1.6855131387710571,
"learning_rate": 5.8769649770392066e-05,
"loss": 0.2656,
"step": 1600
},
{
"epoch": 1.65,
"grad_norm": 9.262821197509766,
"learning_rate": 4.54625706437441e-05,
"loss": 0.1447,
"step": 1650
},
{
"epoch": 1.7,
"grad_norm": 0.10529584437608719,
"learning_rate": 3.3712144857573926e-05,
"loss": 0.1424,
"step": 1700
},
{
"epoch": 1.75,
"grad_norm": 8.399650573730469,
"learning_rate": 2.360780024721515e-05,
"loss": 0.2099,
"step": 1750
},
{
"epoch": 1.8,
"grad_norm": 0.10371621698141098,
"learning_rate": 1.5226436976322727e-05,
"loss": 0.1819,
"step": 1800
},
{
"epoch": 1.85,
"grad_norm": 0.38396862149238586,
"learning_rate": 8.631842280193758e-06,
"loss": 0.2066,
"step": 1850
},
{
"epoch": 1.9,
"grad_norm": 0.6245690584182739,
"learning_rate": 3.874205006390852e-06,
"loss": 0.1865,
"step": 1900
},
{
"epoch": 1.95,
"grad_norm": 7.9351487159729,
"learning_rate": 9.897336473076167e-07,
"loss": 0.2503,
"step": 1950
},
{
"epoch": 2.0,
"grad_norm": 4.218188285827637,
"learning_rate": 3.807716780768189e-10,
"loss": 0.165,
"step": 2000
},
{
"epoch": 2.0,
"eval_accuracy": 0.924,
"eval_loss": 0.268412709236145,
"eval_runtime": 47.5747,
"eval_samples_per_second": 42.039,
"eval_steps_per_second": 21.02,
"step": 2000
}
],
"logging_steps": 50,
"max_steps": 2000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4231595753472000.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}