{ "best_metric": 1.84548545, "best_model_checkpoint": "/public1/home/stu52275901020/swift/model_output/\u9ad8\u795e\u7ecf\u8d28/internvl2-8b/v36-20241204-000354/checkpoint-150", "epoch": 1.702127659574468, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "acc": 0.4818837, "epoch": 0.011347517730496455, "grad_norm": 1.6654787063598633, "learning_rate": 7.142857142857143e-06, "loss": 2.21617293, "memory(GiB)": 21.82, "step": 1, "train_speed(iter/s)": 0.047704 }, { "acc": 0.50631422, "epoch": 0.05673758865248227, "grad_norm": 1.5600533485412598, "learning_rate": 3.571428571428572e-05, "loss": 2.20940375, "memory(GiB)": 23.14, "step": 5, "train_speed(iter/s)": 0.099488 }, { "acc": 0.51391821, "epoch": 0.11347517730496454, "grad_norm": 1.7131439447402954, "learning_rate": 7.142857142857143e-05, "loss": 2.04298916, "memory(GiB)": 23.14, "step": 10, "train_speed(iter/s)": 0.114765 }, { "acc": 0.55212674, "epoch": 0.1702127659574468, "grad_norm": 1.686623215675354, "learning_rate": 9.999605221019081e-05, "loss": 1.88184052, "memory(GiB)": 24.5, "step": 15, "train_speed(iter/s)": 0.12107 }, { "acc": 0.53304815, "epoch": 0.22695035460992907, "grad_norm": 1.0942801237106323, "learning_rate": 9.98579450130307e-05, "loss": 1.94336758, "memory(GiB)": 24.5, "step": 20, "train_speed(iter/s)": 0.124439 }, { "acc": 0.56079154, "epoch": 0.28368794326241137, "grad_norm": 0.9606024622917175, "learning_rate": 9.952307128483256e-05, "loss": 1.77457695, "memory(GiB)": 24.5, "step": 25, "train_speed(iter/s)": 0.126437 }, { "acc": 0.55825844, "epoch": 0.3404255319148936, "grad_norm": 0.922258198261261, "learning_rate": 9.899275261921234e-05, "loss": 1.84706001, "memory(GiB)": 24.5, "step": 30, "train_speed(iter/s)": 0.127904 }, { "acc": 0.56207647, "epoch": 0.3971631205673759, "grad_norm": 1.0378409624099731, "learning_rate": 9.82690819416637e-05, "loss": 1.753508, "memory(GiB)": 24.5, "step": 35, "train_speed(iter/s)": 0.128886 }, { "acc": 0.57356367, "epoch": 0.45390070921985815, "grad_norm": 1.1508853435516357, "learning_rate": 9.735491524973722e-05, "loss": 1.75303555, "memory(GiB)": 24.5, "step": 40, "train_speed(iter/s)": 0.129863 }, { "acc": 0.55143838, "epoch": 0.5106382978723404, "grad_norm": 1.2086931467056274, "learning_rate": 9.62538603417229e-05, "loss": 1.85968819, "memory(GiB)": 25.91, "step": 45, "train_speed(iter/s)": 0.130489 }, { "acc": 0.55636163, "epoch": 0.5673758865248227, "grad_norm": 1.0430564880371094, "learning_rate": 9.497026257831855e-05, "loss": 1.7241251, "memory(GiB)": 25.91, "step": 50, "train_speed(iter/s)": 0.131006 }, { "epoch": 0.5673758865248227, "eval_acc": 0.5656192236598891, "eval_loss": 1.916869044303894, "eval_runtime": 2.4464, "eval_samples_per_second": 5.723, "eval_steps_per_second": 5.723, "step": 50 }, { "acc": 0.54153261, "epoch": 0.624113475177305, "grad_norm": 1.2167576551437378, "learning_rate": 9.35091877334763e-05, "loss": 1.82617283, "memory(GiB)": 26.3, "step": 55, "train_speed(iter/s)": 0.129776 }, { "acc": 0.56068554, "epoch": 0.6808510638297872, "grad_norm": 1.3103394508361816, "learning_rate": 9.18764020021071e-05, "loss": 1.67511196, "memory(GiB)": 26.3, "step": 60, "train_speed(iter/s)": 0.130279 }, { "acc": 0.58310304, "epoch": 0.7375886524822695, "grad_norm": 1.0143637657165527, "learning_rate": 9.007834924354383e-05, "loss": 1.67270927, "memory(GiB)": 26.3, "step": 65, "train_speed(iter/s)": 0.130674 }, { "acc": 0.57467747, "epoch": 0.7943262411347518, "grad_norm": 1.3850369453430176, "learning_rate": 8.81221255505724e-05, "loss": 1.70278339, "memory(GiB)": 26.3, "step": 70, "train_speed(iter/s)": 0.131012 }, { "acc": 0.5916081, "epoch": 0.851063829787234, "grad_norm": 1.0732088088989258, "learning_rate": 8.601545124439535e-05, "loss": 1.63483982, "memory(GiB)": 26.3, "step": 75, "train_speed(iter/s)": 0.131314 }, { "acc": 0.56617246, "epoch": 0.9078014184397163, "grad_norm": 1.084026575088501, "learning_rate": 8.376664040605122e-05, "loss": 1.73948345, "memory(GiB)": 26.3, "step": 80, "train_speed(iter/s)": 0.131575 }, { "acc": 0.54482946, "epoch": 0.9645390070921985, "grad_norm": 1.0596380233764648, "learning_rate": 8.138456806453503e-05, "loss": 1.84302387, "memory(GiB)": 26.3, "step": 85, "train_speed(iter/s)": 0.131746 }, { "acc": 0.60235672, "epoch": 1.0212765957446808, "grad_norm": 0.9688817858695984, "learning_rate": 7.887863517111338e-05, "loss": 1.53876371, "memory(GiB)": 26.3, "step": 90, "train_speed(iter/s)": 0.131937 }, { "acc": 0.61323328, "epoch": 1.0780141843971631, "grad_norm": 1.1411057710647583, "learning_rate": 7.62587314980648e-05, "loss": 1.45239391, "memory(GiB)": 26.3, "step": 95, "train_speed(iter/s)": 0.132127 }, { "acc": 0.59773045, "epoch": 1.1347517730496455, "grad_norm": 1.1101690530776978, "learning_rate": 7.353519660826665e-05, "loss": 1.54209366, "memory(GiB)": 26.3, "step": 100, "train_speed(iter/s)": 0.132289 }, { "epoch": 1.1347517730496455, "eval_acc": 0.5360443622920518, "eval_loss": 1.8874558210372925, "eval_runtime": 2.3721, "eval_samples_per_second": 5.902, "eval_steps_per_second": 5.902, "step": 100 }, { "acc": 0.61074128, "epoch": 1.1914893617021276, "grad_norm": 1.1275923252105713, "learning_rate": 7.071877904966423e-05, "loss": 1.45454111, "memory(GiB)": 26.3, "step": 105, "train_speed(iter/s)": 0.131567 }, { "acc": 0.60273228, "epoch": 1.24822695035461, "grad_norm": 1.3083913326263428, "learning_rate": 6.782059393566253e-05, "loss": 1.48250647, "memory(GiB)": 26.3, "step": 110, "train_speed(iter/s)": 0.13177 }, { "acc": 0.62370677, "epoch": 1.3049645390070923, "grad_norm": 1.0839005708694458, "learning_rate": 6.485207907885175e-05, "loss": 1.46015568, "memory(GiB)": 26.3, "step": 115, "train_speed(iter/s)": 0.131889 }, { "acc": 0.61979499, "epoch": 1.3617021276595744, "grad_norm": 1.2509405612945557, "learning_rate": 6.182494985118624e-05, "loss": 1.45815115, "memory(GiB)": 26.3, "step": 120, "train_speed(iter/s)": 0.132091 }, { "acc": 0.61155128, "epoch": 1.4184397163120568, "grad_norm": 1.4081703424453735, "learning_rate": 5.875115294876381e-05, "loss": 1.44713774, "memory(GiB)": 26.3, "step": 125, "train_speed(iter/s)": 0.132225 }, { "acc": 0.61849551, "epoch": 1.475177304964539, "grad_norm": 1.3654893636703491, "learning_rate": 5.564281924367408e-05, "loss": 1.39691505, "memory(GiB)": 26.3, "step": 130, "train_speed(iter/s)": 0.132372 }, { "acc": 0.62567844, "epoch": 1.5319148936170213, "grad_norm": 1.5169520378112793, "learning_rate": 5.2512215908988484e-05, "loss": 1.41729832, "memory(GiB)": 26.3, "step": 135, "train_speed(iter/s)": 0.132474 }, { "acc": 0.63134012, "epoch": 1.5886524822695036, "grad_norm": 1.4038567543029785, "learning_rate": 4.9371698005832365e-05, "loss": 1.37339115, "memory(GiB)": 26.3, "step": 140, "train_speed(iter/s)": 0.132568 }, { "acc": 0.61558661, "epoch": 1.645390070921986, "grad_norm": 1.513599157333374, "learning_rate": 4.623365972360337e-05, "loss": 1.44473677, "memory(GiB)": 26.3, "step": 145, "train_speed(iter/s)": 0.132637 }, { "acc": 0.62634563, "epoch": 1.702127659574468, "grad_norm": 1.3617616891860962, "learning_rate": 4.31104854657681e-05, "loss": 1.38092842, "memory(GiB)": 26.3, "step": 150, "train_speed(iter/s)": 0.13276 }, { "epoch": 1.702127659574468, "eval_acc": 0.5489833641404805, "eval_loss": 1.8454854488372803, "eval_runtime": 2.5064, "eval_samples_per_second": 5.586, "eval_steps_per_second": 5.586, "step": 150 } ], "logging_steps": 5, "max_steps": 264, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }