internvl-HN / trainer_state.json
geminimars's picture
Upload 13 files
b5dccc3 verified
{
"best_metric": 1.84548545,
"best_model_checkpoint": "/public1/home/stu52275901020/swift/model_output/\u9ad8\u795e\u7ecf\u8d28/internvl2-8b/v36-20241204-000354/checkpoint-150",
"epoch": 1.702127659574468,
"eval_steps": 50,
"global_step": 150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"acc": 0.4818837,
"epoch": 0.011347517730496455,
"grad_norm": 1.6654787063598633,
"learning_rate": 7.142857142857143e-06,
"loss": 2.21617293,
"memory(GiB)": 21.82,
"step": 1,
"train_speed(iter/s)": 0.047704
},
{
"acc": 0.50631422,
"epoch": 0.05673758865248227,
"grad_norm": 1.5600533485412598,
"learning_rate": 3.571428571428572e-05,
"loss": 2.20940375,
"memory(GiB)": 23.14,
"step": 5,
"train_speed(iter/s)": 0.099488
},
{
"acc": 0.51391821,
"epoch": 0.11347517730496454,
"grad_norm": 1.7131439447402954,
"learning_rate": 7.142857142857143e-05,
"loss": 2.04298916,
"memory(GiB)": 23.14,
"step": 10,
"train_speed(iter/s)": 0.114765
},
{
"acc": 0.55212674,
"epoch": 0.1702127659574468,
"grad_norm": 1.686623215675354,
"learning_rate": 9.999605221019081e-05,
"loss": 1.88184052,
"memory(GiB)": 24.5,
"step": 15,
"train_speed(iter/s)": 0.12107
},
{
"acc": 0.53304815,
"epoch": 0.22695035460992907,
"grad_norm": 1.0942801237106323,
"learning_rate": 9.98579450130307e-05,
"loss": 1.94336758,
"memory(GiB)": 24.5,
"step": 20,
"train_speed(iter/s)": 0.124439
},
{
"acc": 0.56079154,
"epoch": 0.28368794326241137,
"grad_norm": 0.9606024622917175,
"learning_rate": 9.952307128483256e-05,
"loss": 1.77457695,
"memory(GiB)": 24.5,
"step": 25,
"train_speed(iter/s)": 0.126437
},
{
"acc": 0.55825844,
"epoch": 0.3404255319148936,
"grad_norm": 0.922258198261261,
"learning_rate": 9.899275261921234e-05,
"loss": 1.84706001,
"memory(GiB)": 24.5,
"step": 30,
"train_speed(iter/s)": 0.127904
},
{
"acc": 0.56207647,
"epoch": 0.3971631205673759,
"grad_norm": 1.0378409624099731,
"learning_rate": 9.82690819416637e-05,
"loss": 1.753508,
"memory(GiB)": 24.5,
"step": 35,
"train_speed(iter/s)": 0.128886
},
{
"acc": 0.57356367,
"epoch": 0.45390070921985815,
"grad_norm": 1.1508853435516357,
"learning_rate": 9.735491524973722e-05,
"loss": 1.75303555,
"memory(GiB)": 24.5,
"step": 40,
"train_speed(iter/s)": 0.129863
},
{
"acc": 0.55143838,
"epoch": 0.5106382978723404,
"grad_norm": 1.2086931467056274,
"learning_rate": 9.62538603417229e-05,
"loss": 1.85968819,
"memory(GiB)": 25.91,
"step": 45,
"train_speed(iter/s)": 0.130489
},
{
"acc": 0.55636163,
"epoch": 0.5673758865248227,
"grad_norm": 1.0430564880371094,
"learning_rate": 9.497026257831855e-05,
"loss": 1.7241251,
"memory(GiB)": 25.91,
"step": 50,
"train_speed(iter/s)": 0.131006
},
{
"epoch": 0.5673758865248227,
"eval_acc": 0.5656192236598891,
"eval_loss": 1.916869044303894,
"eval_runtime": 2.4464,
"eval_samples_per_second": 5.723,
"eval_steps_per_second": 5.723,
"step": 50
},
{
"acc": 0.54153261,
"epoch": 0.624113475177305,
"grad_norm": 1.2167576551437378,
"learning_rate": 9.35091877334763e-05,
"loss": 1.82617283,
"memory(GiB)": 26.3,
"step": 55,
"train_speed(iter/s)": 0.129776
},
{
"acc": 0.56068554,
"epoch": 0.6808510638297872,
"grad_norm": 1.3103394508361816,
"learning_rate": 9.18764020021071e-05,
"loss": 1.67511196,
"memory(GiB)": 26.3,
"step": 60,
"train_speed(iter/s)": 0.130279
},
{
"acc": 0.58310304,
"epoch": 0.7375886524822695,
"grad_norm": 1.0143637657165527,
"learning_rate": 9.007834924354383e-05,
"loss": 1.67270927,
"memory(GiB)": 26.3,
"step": 65,
"train_speed(iter/s)": 0.130674
},
{
"acc": 0.57467747,
"epoch": 0.7943262411347518,
"grad_norm": 1.3850369453430176,
"learning_rate": 8.81221255505724e-05,
"loss": 1.70278339,
"memory(GiB)": 26.3,
"step": 70,
"train_speed(iter/s)": 0.131012
},
{
"acc": 0.5916081,
"epoch": 0.851063829787234,
"grad_norm": 1.0732088088989258,
"learning_rate": 8.601545124439535e-05,
"loss": 1.63483982,
"memory(GiB)": 26.3,
"step": 75,
"train_speed(iter/s)": 0.131314
},
{
"acc": 0.56617246,
"epoch": 0.9078014184397163,
"grad_norm": 1.084026575088501,
"learning_rate": 8.376664040605122e-05,
"loss": 1.73948345,
"memory(GiB)": 26.3,
"step": 80,
"train_speed(iter/s)": 0.131575
},
{
"acc": 0.54482946,
"epoch": 0.9645390070921985,
"grad_norm": 1.0596380233764648,
"learning_rate": 8.138456806453503e-05,
"loss": 1.84302387,
"memory(GiB)": 26.3,
"step": 85,
"train_speed(iter/s)": 0.131746
},
{
"acc": 0.60235672,
"epoch": 1.0212765957446808,
"grad_norm": 0.9688817858695984,
"learning_rate": 7.887863517111338e-05,
"loss": 1.53876371,
"memory(GiB)": 26.3,
"step": 90,
"train_speed(iter/s)": 0.131937
},
{
"acc": 0.61323328,
"epoch": 1.0780141843971631,
"grad_norm": 1.1411057710647583,
"learning_rate": 7.62587314980648e-05,
"loss": 1.45239391,
"memory(GiB)": 26.3,
"step": 95,
"train_speed(iter/s)": 0.132127
},
{
"acc": 0.59773045,
"epoch": 1.1347517730496455,
"grad_norm": 1.1101690530776978,
"learning_rate": 7.353519660826665e-05,
"loss": 1.54209366,
"memory(GiB)": 26.3,
"step": 100,
"train_speed(iter/s)": 0.132289
},
{
"epoch": 1.1347517730496455,
"eval_acc": 0.5360443622920518,
"eval_loss": 1.8874558210372925,
"eval_runtime": 2.3721,
"eval_samples_per_second": 5.902,
"eval_steps_per_second": 5.902,
"step": 100
},
{
"acc": 0.61074128,
"epoch": 1.1914893617021276,
"grad_norm": 1.1275923252105713,
"learning_rate": 7.071877904966423e-05,
"loss": 1.45454111,
"memory(GiB)": 26.3,
"step": 105,
"train_speed(iter/s)": 0.131567
},
{
"acc": 0.60273228,
"epoch": 1.24822695035461,
"grad_norm": 1.3083913326263428,
"learning_rate": 6.782059393566253e-05,
"loss": 1.48250647,
"memory(GiB)": 26.3,
"step": 110,
"train_speed(iter/s)": 0.13177
},
{
"acc": 0.62370677,
"epoch": 1.3049645390070923,
"grad_norm": 1.0839005708694458,
"learning_rate": 6.485207907885175e-05,
"loss": 1.46015568,
"memory(GiB)": 26.3,
"step": 115,
"train_speed(iter/s)": 0.131889
},
{
"acc": 0.61979499,
"epoch": 1.3617021276595744,
"grad_norm": 1.2509405612945557,
"learning_rate": 6.182494985118624e-05,
"loss": 1.45815115,
"memory(GiB)": 26.3,
"step": 120,
"train_speed(iter/s)": 0.132091
},
{
"acc": 0.61155128,
"epoch": 1.4184397163120568,
"grad_norm": 1.4081703424453735,
"learning_rate": 5.875115294876381e-05,
"loss": 1.44713774,
"memory(GiB)": 26.3,
"step": 125,
"train_speed(iter/s)": 0.132225
},
{
"acc": 0.61849551,
"epoch": 1.475177304964539,
"grad_norm": 1.3654893636703491,
"learning_rate": 5.564281924367408e-05,
"loss": 1.39691505,
"memory(GiB)": 26.3,
"step": 130,
"train_speed(iter/s)": 0.132372
},
{
"acc": 0.62567844,
"epoch": 1.5319148936170213,
"grad_norm": 1.5169520378112793,
"learning_rate": 5.2512215908988484e-05,
"loss": 1.41729832,
"memory(GiB)": 26.3,
"step": 135,
"train_speed(iter/s)": 0.132474
},
{
"acc": 0.63134012,
"epoch": 1.5886524822695036,
"grad_norm": 1.4038567543029785,
"learning_rate": 4.9371698005832365e-05,
"loss": 1.37339115,
"memory(GiB)": 26.3,
"step": 140,
"train_speed(iter/s)": 0.132568
},
{
"acc": 0.61558661,
"epoch": 1.645390070921986,
"grad_norm": 1.513599157333374,
"learning_rate": 4.623365972360337e-05,
"loss": 1.44473677,
"memory(GiB)": 26.3,
"step": 145,
"train_speed(iter/s)": 0.132637
},
{
"acc": 0.62634563,
"epoch": 1.702127659574468,
"grad_norm": 1.3617616891860962,
"learning_rate": 4.31104854657681e-05,
"loss": 1.38092842,
"memory(GiB)": 26.3,
"step": 150,
"train_speed(iter/s)": 0.13276
},
{
"epoch": 1.702127659574468,
"eval_acc": 0.5489833641404805,
"eval_loss": 1.8454854488372803,
"eval_runtime": 2.5064,
"eval_samples_per_second": 5.586,
"eval_steps_per_second": 5.586,
"step": 150
}
],
"logging_steps": 5,
"max_steps": 264,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}