| { | |
| "best_metric": 1.84548545, | |
| "best_model_checkpoint": "/public1/home/stu52275901020/swift/model_output/\u9ad8\u795e\u7ecf\u8d28/internvl2-8b/v36-20241204-000354/checkpoint-150", | |
| "epoch": 1.702127659574468, | |
| "eval_steps": 50, | |
| "global_step": 150, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "acc": 0.4818837, | |
| "epoch": 0.011347517730496455, | |
| "grad_norm": 1.6654787063598633, | |
| "learning_rate": 7.142857142857143e-06, | |
| "loss": 2.21617293, | |
| "memory(GiB)": 21.82, | |
| "step": 1, | |
| "train_speed(iter/s)": 0.047704 | |
| }, | |
| { | |
| "acc": 0.50631422, | |
| "epoch": 0.05673758865248227, | |
| "grad_norm": 1.5600533485412598, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 2.20940375, | |
| "memory(GiB)": 23.14, | |
| "step": 5, | |
| "train_speed(iter/s)": 0.099488 | |
| }, | |
| { | |
| "acc": 0.51391821, | |
| "epoch": 0.11347517730496454, | |
| "grad_norm": 1.7131439447402954, | |
| "learning_rate": 7.142857142857143e-05, | |
| "loss": 2.04298916, | |
| "memory(GiB)": 23.14, | |
| "step": 10, | |
| "train_speed(iter/s)": 0.114765 | |
| }, | |
| { | |
| "acc": 0.55212674, | |
| "epoch": 0.1702127659574468, | |
| "grad_norm": 1.686623215675354, | |
| "learning_rate": 9.999605221019081e-05, | |
| "loss": 1.88184052, | |
| "memory(GiB)": 24.5, | |
| "step": 15, | |
| "train_speed(iter/s)": 0.12107 | |
| }, | |
| { | |
| "acc": 0.53304815, | |
| "epoch": 0.22695035460992907, | |
| "grad_norm": 1.0942801237106323, | |
| "learning_rate": 9.98579450130307e-05, | |
| "loss": 1.94336758, | |
| "memory(GiB)": 24.5, | |
| "step": 20, | |
| "train_speed(iter/s)": 0.124439 | |
| }, | |
| { | |
| "acc": 0.56079154, | |
| "epoch": 0.28368794326241137, | |
| "grad_norm": 0.9606024622917175, | |
| "learning_rate": 9.952307128483256e-05, | |
| "loss": 1.77457695, | |
| "memory(GiB)": 24.5, | |
| "step": 25, | |
| "train_speed(iter/s)": 0.126437 | |
| }, | |
| { | |
| "acc": 0.55825844, | |
| "epoch": 0.3404255319148936, | |
| "grad_norm": 0.922258198261261, | |
| "learning_rate": 9.899275261921234e-05, | |
| "loss": 1.84706001, | |
| "memory(GiB)": 24.5, | |
| "step": 30, | |
| "train_speed(iter/s)": 0.127904 | |
| }, | |
| { | |
| "acc": 0.56207647, | |
| "epoch": 0.3971631205673759, | |
| "grad_norm": 1.0378409624099731, | |
| "learning_rate": 9.82690819416637e-05, | |
| "loss": 1.753508, | |
| "memory(GiB)": 24.5, | |
| "step": 35, | |
| "train_speed(iter/s)": 0.128886 | |
| }, | |
| { | |
| "acc": 0.57356367, | |
| "epoch": 0.45390070921985815, | |
| "grad_norm": 1.1508853435516357, | |
| "learning_rate": 9.735491524973722e-05, | |
| "loss": 1.75303555, | |
| "memory(GiB)": 24.5, | |
| "step": 40, | |
| "train_speed(iter/s)": 0.129863 | |
| }, | |
| { | |
| "acc": 0.55143838, | |
| "epoch": 0.5106382978723404, | |
| "grad_norm": 1.2086931467056274, | |
| "learning_rate": 9.62538603417229e-05, | |
| "loss": 1.85968819, | |
| "memory(GiB)": 25.91, | |
| "step": 45, | |
| "train_speed(iter/s)": 0.130489 | |
| }, | |
| { | |
| "acc": 0.55636163, | |
| "epoch": 0.5673758865248227, | |
| "grad_norm": 1.0430564880371094, | |
| "learning_rate": 9.497026257831855e-05, | |
| "loss": 1.7241251, | |
| "memory(GiB)": 25.91, | |
| "step": 50, | |
| "train_speed(iter/s)": 0.131006 | |
| }, | |
| { | |
| "epoch": 0.5673758865248227, | |
| "eval_acc": 0.5656192236598891, | |
| "eval_loss": 1.916869044303894, | |
| "eval_runtime": 2.4464, | |
| "eval_samples_per_second": 5.723, | |
| "eval_steps_per_second": 5.723, | |
| "step": 50 | |
| }, | |
| { | |
| "acc": 0.54153261, | |
| "epoch": 0.624113475177305, | |
| "grad_norm": 1.2167576551437378, | |
| "learning_rate": 9.35091877334763e-05, | |
| "loss": 1.82617283, | |
| "memory(GiB)": 26.3, | |
| "step": 55, | |
| "train_speed(iter/s)": 0.129776 | |
| }, | |
| { | |
| "acc": 0.56068554, | |
| "epoch": 0.6808510638297872, | |
| "grad_norm": 1.3103394508361816, | |
| "learning_rate": 9.18764020021071e-05, | |
| "loss": 1.67511196, | |
| "memory(GiB)": 26.3, | |
| "step": 60, | |
| "train_speed(iter/s)": 0.130279 | |
| }, | |
| { | |
| "acc": 0.58310304, | |
| "epoch": 0.7375886524822695, | |
| "grad_norm": 1.0143637657165527, | |
| "learning_rate": 9.007834924354383e-05, | |
| "loss": 1.67270927, | |
| "memory(GiB)": 26.3, | |
| "step": 65, | |
| "train_speed(iter/s)": 0.130674 | |
| }, | |
| { | |
| "acc": 0.57467747, | |
| "epoch": 0.7943262411347518, | |
| "grad_norm": 1.3850369453430176, | |
| "learning_rate": 8.81221255505724e-05, | |
| "loss": 1.70278339, | |
| "memory(GiB)": 26.3, | |
| "step": 70, | |
| "train_speed(iter/s)": 0.131012 | |
| }, | |
| { | |
| "acc": 0.5916081, | |
| "epoch": 0.851063829787234, | |
| "grad_norm": 1.0732088088989258, | |
| "learning_rate": 8.601545124439535e-05, | |
| "loss": 1.63483982, | |
| "memory(GiB)": 26.3, | |
| "step": 75, | |
| "train_speed(iter/s)": 0.131314 | |
| }, | |
| { | |
| "acc": 0.56617246, | |
| "epoch": 0.9078014184397163, | |
| "grad_norm": 1.084026575088501, | |
| "learning_rate": 8.376664040605122e-05, | |
| "loss": 1.73948345, | |
| "memory(GiB)": 26.3, | |
| "step": 80, | |
| "train_speed(iter/s)": 0.131575 | |
| }, | |
| { | |
| "acc": 0.54482946, | |
| "epoch": 0.9645390070921985, | |
| "grad_norm": 1.0596380233764648, | |
| "learning_rate": 8.138456806453503e-05, | |
| "loss": 1.84302387, | |
| "memory(GiB)": 26.3, | |
| "step": 85, | |
| "train_speed(iter/s)": 0.131746 | |
| }, | |
| { | |
| "acc": 0.60235672, | |
| "epoch": 1.0212765957446808, | |
| "grad_norm": 0.9688817858695984, | |
| "learning_rate": 7.887863517111338e-05, | |
| "loss": 1.53876371, | |
| "memory(GiB)": 26.3, | |
| "step": 90, | |
| "train_speed(iter/s)": 0.131937 | |
| }, | |
| { | |
| "acc": 0.61323328, | |
| "epoch": 1.0780141843971631, | |
| "grad_norm": 1.1411057710647583, | |
| "learning_rate": 7.62587314980648e-05, | |
| "loss": 1.45239391, | |
| "memory(GiB)": 26.3, | |
| "step": 95, | |
| "train_speed(iter/s)": 0.132127 | |
| }, | |
| { | |
| "acc": 0.59773045, | |
| "epoch": 1.1347517730496455, | |
| "grad_norm": 1.1101690530776978, | |
| "learning_rate": 7.353519660826665e-05, | |
| "loss": 1.54209366, | |
| "memory(GiB)": 26.3, | |
| "step": 100, | |
| "train_speed(iter/s)": 0.132289 | |
| }, | |
| { | |
| "epoch": 1.1347517730496455, | |
| "eval_acc": 0.5360443622920518, | |
| "eval_loss": 1.8874558210372925, | |
| "eval_runtime": 2.3721, | |
| "eval_samples_per_second": 5.902, | |
| "eval_steps_per_second": 5.902, | |
| "step": 100 | |
| }, | |
| { | |
| "acc": 0.61074128, | |
| "epoch": 1.1914893617021276, | |
| "grad_norm": 1.1275923252105713, | |
| "learning_rate": 7.071877904966423e-05, | |
| "loss": 1.45454111, | |
| "memory(GiB)": 26.3, | |
| "step": 105, | |
| "train_speed(iter/s)": 0.131567 | |
| }, | |
| { | |
| "acc": 0.60273228, | |
| "epoch": 1.24822695035461, | |
| "grad_norm": 1.3083913326263428, | |
| "learning_rate": 6.782059393566253e-05, | |
| "loss": 1.48250647, | |
| "memory(GiB)": 26.3, | |
| "step": 110, | |
| "train_speed(iter/s)": 0.13177 | |
| }, | |
| { | |
| "acc": 0.62370677, | |
| "epoch": 1.3049645390070923, | |
| "grad_norm": 1.0839005708694458, | |
| "learning_rate": 6.485207907885175e-05, | |
| "loss": 1.46015568, | |
| "memory(GiB)": 26.3, | |
| "step": 115, | |
| "train_speed(iter/s)": 0.131889 | |
| }, | |
| { | |
| "acc": 0.61979499, | |
| "epoch": 1.3617021276595744, | |
| "grad_norm": 1.2509405612945557, | |
| "learning_rate": 6.182494985118624e-05, | |
| "loss": 1.45815115, | |
| "memory(GiB)": 26.3, | |
| "step": 120, | |
| "train_speed(iter/s)": 0.132091 | |
| }, | |
| { | |
| "acc": 0.61155128, | |
| "epoch": 1.4184397163120568, | |
| "grad_norm": 1.4081703424453735, | |
| "learning_rate": 5.875115294876381e-05, | |
| "loss": 1.44713774, | |
| "memory(GiB)": 26.3, | |
| "step": 125, | |
| "train_speed(iter/s)": 0.132225 | |
| }, | |
| { | |
| "acc": 0.61849551, | |
| "epoch": 1.475177304964539, | |
| "grad_norm": 1.3654893636703491, | |
| "learning_rate": 5.564281924367408e-05, | |
| "loss": 1.39691505, | |
| "memory(GiB)": 26.3, | |
| "step": 130, | |
| "train_speed(iter/s)": 0.132372 | |
| }, | |
| { | |
| "acc": 0.62567844, | |
| "epoch": 1.5319148936170213, | |
| "grad_norm": 1.5169520378112793, | |
| "learning_rate": 5.2512215908988484e-05, | |
| "loss": 1.41729832, | |
| "memory(GiB)": 26.3, | |
| "step": 135, | |
| "train_speed(iter/s)": 0.132474 | |
| }, | |
| { | |
| "acc": 0.63134012, | |
| "epoch": 1.5886524822695036, | |
| "grad_norm": 1.4038567543029785, | |
| "learning_rate": 4.9371698005832365e-05, | |
| "loss": 1.37339115, | |
| "memory(GiB)": 26.3, | |
| "step": 140, | |
| "train_speed(iter/s)": 0.132568 | |
| }, | |
| { | |
| "acc": 0.61558661, | |
| "epoch": 1.645390070921986, | |
| "grad_norm": 1.513599157333374, | |
| "learning_rate": 4.623365972360337e-05, | |
| "loss": 1.44473677, | |
| "memory(GiB)": 26.3, | |
| "step": 145, | |
| "train_speed(iter/s)": 0.132637 | |
| }, | |
| { | |
| "acc": 0.62634563, | |
| "epoch": 1.702127659574468, | |
| "grad_norm": 1.3617616891860962, | |
| "learning_rate": 4.31104854657681e-05, | |
| "loss": 1.38092842, | |
| "memory(GiB)": 26.3, | |
| "step": 150, | |
| "train_speed(iter/s)": 0.13276 | |
| }, | |
| { | |
| "epoch": 1.702127659574468, | |
| "eval_acc": 0.5489833641404805, | |
| "eval_loss": 1.8454854488372803, | |
| "eval_runtime": 2.5064, | |
| "eval_samples_per_second": 5.586, | |
| "eval_steps_per_second": 5.586, | |
| "step": 150 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 264, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |