| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 32740, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.15271838729383017, | |
| "grad_norm": 18.494943618774414, | |
| "learning_rate": 1.9694563225412342e-05, | |
| "loss": 0.5789, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.30543677458766033, | |
| "grad_norm": 8.591017723083496, | |
| "learning_rate": 1.938912645082468e-05, | |
| "loss": 0.4508, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4581551618814905, | |
| "grad_norm": 8.066777229309082, | |
| "learning_rate": 1.9083689676237022e-05, | |
| "loss": 0.4167, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6108735491753207, | |
| "grad_norm": 11.628117561340332, | |
| "learning_rate": 1.8778252901649362e-05, | |
| "loss": 0.4007, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7635919364691509, | |
| "grad_norm": 6.132204055786133, | |
| "learning_rate": 1.84728161270617e-05, | |
| "loss": 0.3831, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.916310323762981, | |
| "grad_norm": 6.5475640296936035, | |
| "learning_rate": 1.816737935247404e-05, | |
| "loss": 0.3605, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.0690287110568113, | |
| "grad_norm": 4.849888801574707, | |
| "learning_rate": 1.786194257788638e-05, | |
| "loss": 0.3139, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.2217470983506413, | |
| "grad_norm": 12.288202285766602, | |
| "learning_rate": 1.755650580329872e-05, | |
| "loss": 0.2494, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.3744654856444716, | |
| "grad_norm": 19.027870178222656, | |
| "learning_rate": 1.725106902871106e-05, | |
| "loss": 0.2561, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.5271838729383018, | |
| "grad_norm": 12.914681434631348, | |
| "learning_rate": 1.69456322541234e-05, | |
| "loss": 0.2464, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.679902260232132, | |
| "grad_norm": 8.226604461669922, | |
| "learning_rate": 1.6640195479535736e-05, | |
| "loss": 0.2599, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.832620647525962, | |
| "grad_norm": 14.606645584106445, | |
| "learning_rate": 1.6334758704948076e-05, | |
| "loss": 0.2608, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.9853390348197923, | |
| "grad_norm": 15.511763572692871, | |
| "learning_rate": 1.6029321930360416e-05, | |
| "loss": 0.2541, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.1380574221136226, | |
| "grad_norm": 16.74727439880371, | |
| "learning_rate": 1.5723885155772757e-05, | |
| "loss": 0.1556, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.2907758094074526, | |
| "grad_norm": 20.485782623291016, | |
| "learning_rate": 1.5418448381185097e-05, | |
| "loss": 0.153, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.4434941967012827, | |
| "grad_norm": 7.16798734664917, | |
| "learning_rate": 1.5113011606597437e-05, | |
| "loss": 0.1579, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.596212583995113, | |
| "grad_norm": 7.562324523925781, | |
| "learning_rate": 1.4807574832009775e-05, | |
| "loss": 0.1616, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.748930971288943, | |
| "grad_norm": 16.156944274902344, | |
| "learning_rate": 1.4502138057422115e-05, | |
| "loss": 0.1658, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.901649358582773, | |
| "grad_norm": 13.200675964355469, | |
| "learning_rate": 1.4196701282834456e-05, | |
| "loss": 0.1657, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 3.0543677458766036, | |
| "grad_norm": 1.1726553440093994, | |
| "learning_rate": 1.3891264508246794e-05, | |
| "loss": 0.1469, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.2070861331704337, | |
| "grad_norm": 30.926372528076172, | |
| "learning_rate": 1.3585827733659134e-05, | |
| "loss": 0.1103, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 3.359804520464264, | |
| "grad_norm": 11.862848281860352, | |
| "learning_rate": 1.3280390959071474e-05, | |
| "loss": 0.1011, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.512522907758094, | |
| "grad_norm": 7.604775428771973, | |
| "learning_rate": 1.2974954184483813e-05, | |
| "loss": 0.1035, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 3.665241295051924, | |
| "grad_norm": 1.4278388023376465, | |
| "learning_rate": 1.2669517409896153e-05, | |
| "loss": 0.1145, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.8179596823457542, | |
| "grad_norm": 0.45902055501937866, | |
| "learning_rate": 1.2364080635308493e-05, | |
| "loss": 0.1138, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 3.9706780696395847, | |
| "grad_norm": 31.397523880004883, | |
| "learning_rate": 1.2058643860720831e-05, | |
| "loss": 0.1101, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 4.123396456933415, | |
| "grad_norm": 0.23616579174995422, | |
| "learning_rate": 1.1753207086133171e-05, | |
| "loss": 0.0771, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 4.276114844227245, | |
| "grad_norm": 0.22658583521842957, | |
| "learning_rate": 1.1447770311545512e-05, | |
| "loss": 0.0693, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 4.428833231521075, | |
| "grad_norm": 0.35704877972602844, | |
| "learning_rate": 1.114233353695785e-05, | |
| "loss": 0.0751, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 4.581551618814905, | |
| "grad_norm": 0.17207038402557373, | |
| "learning_rate": 1.083689676237019e-05, | |
| "loss": 0.0699, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 4.734270006108735, | |
| "grad_norm": 8.590784072875977, | |
| "learning_rate": 1.053145998778253e-05, | |
| "loss": 0.0692, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 4.886988393402565, | |
| "grad_norm": 25.48099708557129, | |
| "learning_rate": 1.0226023213194869e-05, | |
| "loss": 0.0727, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 5.039706780696396, | |
| "grad_norm": 4.0271501541137695, | |
| "learning_rate": 9.920586438607209e-06, | |
| "loss": 0.0659, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 5.192425167990226, | |
| "grad_norm": 1.6429097652435303, | |
| "learning_rate": 9.615149664019549e-06, | |
| "loss": 0.0508, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 5.345143555284056, | |
| "grad_norm": 0.03841910511255264, | |
| "learning_rate": 9.309712889431889e-06, | |
| "loss": 0.0462, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 5.497861942577886, | |
| "grad_norm": 0.03901192545890808, | |
| "learning_rate": 9.004276114844227e-06, | |
| "loss": 0.0508, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 5.650580329871716, | |
| "grad_norm": 24.19868278503418, | |
| "learning_rate": 8.698839340256568e-06, | |
| "loss": 0.0454, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 5.803298717165546, | |
| "grad_norm": 12.015921592712402, | |
| "learning_rate": 8.393402565668908e-06, | |
| "loss": 0.0525, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 5.956017104459377, | |
| "grad_norm": 0.1184128075838089, | |
| "learning_rate": 8.087965791081248e-06, | |
| "loss": 0.0524, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 6.108735491753207, | |
| "grad_norm": 53.9754524230957, | |
| "learning_rate": 7.782529016493586e-06, | |
| "loss": 0.0431, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 6.261453879047037, | |
| "grad_norm": 0.049675118178129196, | |
| "learning_rate": 7.4770922419059255e-06, | |
| "loss": 0.0311, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 6.414172266340867, | |
| "grad_norm": 0.12765900790691376, | |
| "learning_rate": 7.171655467318266e-06, | |
| "loss": 0.0303, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 6.566890653634697, | |
| "grad_norm": 0.04678593948483467, | |
| "learning_rate": 6.866218692730605e-06, | |
| "loss": 0.0315, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 6.719609040928528, | |
| "grad_norm": 33.2860221862793, | |
| "learning_rate": 6.560781918142944e-06, | |
| "loss": 0.0342, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 6.872327428222358, | |
| "grad_norm": 0.022564252838492393, | |
| "learning_rate": 6.255345143555285e-06, | |
| "loss": 0.0325, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 7.025045815516188, | |
| "grad_norm": 48.42949676513672, | |
| "learning_rate": 5.949908368967624e-06, | |
| "loss": 0.0281, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 7.177764202810018, | |
| "grad_norm": 0.1493069976568222, | |
| "learning_rate": 5.644471594379963e-06, | |
| "loss": 0.0255, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 7.330482590103848, | |
| "grad_norm": 0.02931222692131996, | |
| "learning_rate": 5.339034819792304e-06, | |
| "loss": 0.0218, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 7.483200977397678, | |
| "grad_norm": 99.97526550292969, | |
| "learning_rate": 5.033598045204643e-06, | |
| "loss": 0.0234, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 7.6359193646915084, | |
| "grad_norm": 1.244328260421753, | |
| "learning_rate": 4.728161270616982e-06, | |
| "loss": 0.0248, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 7.788637751985339, | |
| "grad_norm": 0.01413074042648077, | |
| "learning_rate": 4.4227244960293225e-06, | |
| "loss": 0.02, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 7.941356139279169, | |
| "grad_norm": 0.007764923386275768, | |
| "learning_rate": 4.117287721441662e-06, | |
| "loss": 0.0242, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 8.094074526573, | |
| "grad_norm": 0.04147506132721901, | |
| "learning_rate": 3.8118509468540015e-06, | |
| "loss": 0.0175, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 8.24679291386683, | |
| "grad_norm": 41.88145446777344, | |
| "learning_rate": 3.506414172266341e-06, | |
| "loss": 0.0177, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 8.39951130116066, | |
| "grad_norm": 0.005077702924609184, | |
| "learning_rate": 3.2009773976786805e-06, | |
| "loss": 0.019, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 8.55222968845449, | |
| "grad_norm": 87.5268325805664, | |
| "learning_rate": 2.8955406230910206e-06, | |
| "loss": 0.013, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 8.70494807574832, | |
| "grad_norm": 0.03963172435760498, | |
| "learning_rate": 2.5901038485033603e-06, | |
| "loss": 0.0152, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 8.85766646304215, | |
| "grad_norm": 0.0064314561896026134, | |
| "learning_rate": 2.2846670739156996e-06, | |
| "loss": 0.0171, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 9.01038485033598, | |
| "grad_norm": 0.0032072309404611588, | |
| "learning_rate": 1.9792302993280393e-06, | |
| "loss": 0.0161, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 9.16310323762981, | |
| "grad_norm": 0.016286134719848633, | |
| "learning_rate": 1.6737935247403788e-06, | |
| "loss": 0.0117, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 9.315821624923641, | |
| "grad_norm": 0.04490550234913826, | |
| "learning_rate": 1.3683567501527185e-06, | |
| "loss": 0.0122, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 9.46854001221747, | |
| "grad_norm": 0.030366964638233185, | |
| "learning_rate": 1.0629199755650582e-06, | |
| "loss": 0.0076, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 9.621258399511301, | |
| "grad_norm": 0.026300914585590363, | |
| "learning_rate": 7.574832009773978e-07, | |
| "loss": 0.0104, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 9.77397678680513, | |
| "grad_norm": 0.0014681548345834017, | |
| "learning_rate": 4.5204642638973736e-07, | |
| "loss": 0.0107, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 9.926695174098962, | |
| "grad_norm": 0.0018271030858159065, | |
| "learning_rate": 1.4660965180207698e-07, | |
| "loss": 0.0128, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 32740, | |
| "total_flos": 7.032286628473344e+16, | |
| "train_loss": 0.11521797929512122, | |
| "train_runtime": 5803.3368, | |
| "train_samples_per_second": 180.488, | |
| "train_steps_per_second": 5.642 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 32740, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.032286628473344e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |