{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.978540772532189, "eval_steps": 500, "global_step": 348, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08583690987124463, "grad_norm": 5.144778728485107, "learning_rate": 2.9938918800982563e-05, "loss": 0.8269, "num_input_tokens_seen": 25888, "step": 10 }, { "epoch": 0.17167381974248927, "grad_norm": 4.5363545417785645, "learning_rate": 2.975617265898004e-05, "loss": 0.1558, "num_input_tokens_seen": 53280, "step": 20 }, { "epoch": 0.2575107296137339, "grad_norm": 3.198505401611328, "learning_rate": 2.9453249887788343e-05, "loss": 0.1655, "num_input_tokens_seen": 79904, "step": 30 }, { "epoch": 0.34334763948497854, "grad_norm": 3.3528692722320557, "learning_rate": 2.9032617538884018e-05, "loss": 0.1686, "num_input_tokens_seen": 106464, "step": 40 }, { "epoch": 0.4291845493562232, "grad_norm": 3.4683921337127686, "learning_rate": 2.84977013093626e-05, "loss": 0.128, "num_input_tokens_seen": 132672, "step": 50 }, { "epoch": 0.5150214592274678, "grad_norm": 6.693175792694092, "learning_rate": 2.7852857642513838e-05, "loss": 0.1581, "num_input_tokens_seen": 161312, "step": 60 }, { "epoch": 0.6008583690987125, "grad_norm": 1.5621659755706787, "learning_rate": 2.7103338248251055e-05, "loss": 0.1106, "num_input_tokens_seen": 187680, "step": 70 }, { "epoch": 0.6866952789699571, "grad_norm": 1.8518391847610474, "learning_rate": 2.6255247332346036e-05, "loss": 0.1401, "num_input_tokens_seen": 214400, "step": 80 }, { "epoch": 0.7725321888412017, "grad_norm": 2.3987910747528076, "learning_rate": 2.531549188280135e-05, "loss": 0.1282, "num_input_tokens_seen": 240480, "step": 90 }, { "epoch": 0.8583690987124464, "grad_norm": 3.301926612854004, "learning_rate": 2.4291725418235848e-05, "loss": 0.0936, "num_input_tokens_seen": 267360, "step": 100 }, { "epoch": 0.944206008583691, "grad_norm": 7.198034286499023, "learning_rate": 2.3192285656405456e-05, "loss": 0.1569, "num_input_tokens_seen": 294720, "step": 110 }, { "epoch": 1.0257510729613735, "grad_norm": 4.04252290725708, "learning_rate": 2.2026126610496852e-05, "loss": 0.0904, "num_input_tokens_seen": 319184, "step": 120 }, { "epoch": 1.111587982832618, "grad_norm": 5.508508682250977, "learning_rate": 2.0802745666212592e-05, "loss": 0.073, "num_input_tokens_seen": 345680, "step": 130 }, { "epoch": 1.1974248927038627, "grad_norm": 1.6325355768203735, "learning_rate": 1.953210623354359e-05, "loss": 0.058, "num_input_tokens_seen": 371824, "step": 140 }, { "epoch": 1.2832618025751072, "grad_norm": 1.3446625471115112, "learning_rate": 1.8224556603165363e-05, "loss": 0.0818, "num_input_tokens_seen": 398480, "step": 150 }, { "epoch": 1.3690987124463518, "grad_norm": 2.4645965099334717, "learning_rate": 1.689074566830434e-05, "loss": 0.0715, "num_input_tokens_seen": 424656, "step": 160 }, { "epoch": 1.4549356223175964, "grad_norm": 2.872417688369751, "learning_rate": 1.5541536198449044e-05, "loss": 0.0691, "num_input_tokens_seen": 451088, "step": 170 }, { "epoch": 1.5407725321888412, "grad_norm": 2.7769811153411865, "learning_rate": 1.4187916371218739e-05, "loss": 0.0713, "num_input_tokens_seen": 478320, "step": 180 }, { "epoch": 1.6266094420600858, "grad_norm": 3.642953634262085, "learning_rate": 1.2840910282888211e-05, "loss": 0.0985, "num_input_tokens_seen": 505328, "step": 190 }, { "epoch": 1.7124463519313304, "grad_norm": 4.070372104644775, "learning_rate": 1.1511488166385349e-05, "loss": 0.0728, "num_input_tokens_seen": 531920, "step": 200 }, { "epoch": 1.7982832618025753, "grad_norm": 6.3663740158081055, "learning_rate": 1.0210477047960303e-05, "loss": 0.1002, "num_input_tokens_seen": 560496, "step": 210 }, { "epoch": 1.8841201716738198, "grad_norm": 2.0767858028411865, "learning_rate": 8.948472570152874e-06, "loss": 0.0976, "num_input_tokens_seen": 586384, "step": 220 }, { "epoch": 1.9699570815450644, "grad_norm": 4.728202819824219, "learning_rate": 7.735752699185711e-06, "loss": 0.0801, "num_input_tokens_seen": 612496, "step": 230 }, { "epoch": 2.051502145922747, "grad_norm": 0.5934279561042786, "learning_rate": 6.582194019564266e-06, "loss": 0.0533, "num_input_tokens_seen": 637448, "step": 240 }, { "epoch": 2.1373390557939915, "grad_norm": 2.772380828857422, "learning_rate": 5.497191297593647e-06, "loss": 0.059, "num_input_tokens_seen": 663528, "step": 250 }, { "epoch": 2.223175965665236, "grad_norm": 1.757418155670166, "learning_rate": 4.4895809688998655e-06, "loss": 0.0505, "num_input_tokens_seen": 689384, "step": 260 }, { "epoch": 2.3090128755364807, "grad_norm": 2.6222424507141113, "learning_rate": 3.567569173085455e-06, "loss": 0.0404, "num_input_tokens_seen": 715560, "step": 270 }, { "epoch": 2.3948497854077253, "grad_norm": 2.2118000984191895, "learning_rate": 2.7386649216166233e-06, "loss": 0.059, "num_input_tokens_seen": 741960, "step": 280 }, { "epoch": 2.48068669527897, "grad_norm": 4.022334098815918, "learning_rate": 2.0096189432334194e-06, "loss": 0.0429, "num_input_tokens_seen": 768808, "step": 290 }, { "epoch": 2.5665236051502145, "grad_norm": 0.4254824221134186, "learning_rate": 1.3863687049356465e-06, "loss": 0.0341, "num_input_tokens_seen": 795176, "step": 300 }, { "epoch": 2.652360515021459, "grad_norm": 2.2265915870666504, "learning_rate": 8.7399005630238e-07, "loss": 0.0349, "num_input_tokens_seen": 821864, "step": 310 }, { "epoch": 2.7381974248927037, "grad_norm": 1.5945826768875122, "learning_rate": 4.766558909615504e-07, "loss": 0.0375, "num_input_tokens_seen": 849800, "step": 320 }, { "epoch": 2.8240343347639483, "grad_norm": 3.1935954093933105, "learning_rate": 1.9760216187710788e-07, "loss": 0.0184, "num_input_tokens_seen": 876616, "step": 330 }, { "epoch": 2.909871244635193, "grad_norm": 2.1143760681152344, "learning_rate": 3.910152723075322e-08, "loss": 0.0583, "num_input_tokens_seen": 903048, "step": 340 }, { "epoch": 2.978540772532189, "num_input_tokens_seen": 924488, "step": 348, "total_flos": 3.957609622319923e+16, "train_loss": 0.10672961984728944, "train_runtime": 329.4461, "train_samples_per_second": 8.46, "train_steps_per_second": 1.056 } ], "logging_steps": 10, "max_steps": 348, "num_input_tokens_seen": 924488, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.957609622319923e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }