| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 32740, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.15271838729383017, |
| "grad_norm": 18.494943618774414, |
| "learning_rate": 1.9694563225412342e-05, |
| "loss": 0.5789, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.30543677458766033, |
| "grad_norm": 8.591017723083496, |
| "learning_rate": 1.938912645082468e-05, |
| "loss": 0.4508, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4581551618814905, |
| "grad_norm": 8.066777229309082, |
| "learning_rate": 1.9083689676237022e-05, |
| "loss": 0.4167, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6108735491753207, |
| "grad_norm": 11.628117561340332, |
| "learning_rate": 1.8778252901649362e-05, |
| "loss": 0.4007, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7635919364691509, |
| "grad_norm": 6.132204055786133, |
| "learning_rate": 1.84728161270617e-05, |
| "loss": 0.3831, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.916310323762981, |
| "grad_norm": 6.5475640296936035, |
| "learning_rate": 1.816737935247404e-05, |
| "loss": 0.3605, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.0690287110568113, |
| "grad_norm": 4.849888801574707, |
| "learning_rate": 1.786194257788638e-05, |
| "loss": 0.3139, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.2217470983506413, |
| "grad_norm": 12.288202285766602, |
| "learning_rate": 1.755650580329872e-05, |
| "loss": 0.2494, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.3744654856444716, |
| "grad_norm": 19.027870178222656, |
| "learning_rate": 1.725106902871106e-05, |
| "loss": 0.2561, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.5271838729383018, |
| "grad_norm": 12.914681434631348, |
| "learning_rate": 1.69456322541234e-05, |
| "loss": 0.2464, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.679902260232132, |
| "grad_norm": 8.226604461669922, |
| "learning_rate": 1.6640195479535736e-05, |
| "loss": 0.2599, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.832620647525962, |
| "grad_norm": 14.606645584106445, |
| "learning_rate": 1.6334758704948076e-05, |
| "loss": 0.2608, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.9853390348197923, |
| "grad_norm": 15.511763572692871, |
| "learning_rate": 1.6029321930360416e-05, |
| "loss": 0.2541, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.1380574221136226, |
| "grad_norm": 16.74727439880371, |
| "learning_rate": 1.5723885155772757e-05, |
| "loss": 0.1556, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.2907758094074526, |
| "grad_norm": 20.485782623291016, |
| "learning_rate": 1.5418448381185097e-05, |
| "loss": 0.153, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.4434941967012827, |
| "grad_norm": 7.16798734664917, |
| "learning_rate": 1.5113011606597437e-05, |
| "loss": 0.1579, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.596212583995113, |
| "grad_norm": 7.562324523925781, |
| "learning_rate": 1.4807574832009775e-05, |
| "loss": 0.1616, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.748930971288943, |
| "grad_norm": 16.156944274902344, |
| "learning_rate": 1.4502138057422115e-05, |
| "loss": 0.1658, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.901649358582773, |
| "grad_norm": 13.200675964355469, |
| "learning_rate": 1.4196701282834456e-05, |
| "loss": 0.1657, |
| "step": 9500 |
| }, |
| { |
| "epoch": 3.0543677458766036, |
| "grad_norm": 1.1726553440093994, |
| "learning_rate": 1.3891264508246794e-05, |
| "loss": 0.1469, |
| "step": 10000 |
| }, |
| { |
| "epoch": 3.2070861331704337, |
| "grad_norm": 30.926372528076172, |
| "learning_rate": 1.3585827733659134e-05, |
| "loss": 0.1103, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.359804520464264, |
| "grad_norm": 11.862848281860352, |
| "learning_rate": 1.3280390959071474e-05, |
| "loss": 0.1011, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.512522907758094, |
| "grad_norm": 7.604775428771973, |
| "learning_rate": 1.2974954184483813e-05, |
| "loss": 0.1035, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.665241295051924, |
| "grad_norm": 1.4278388023376465, |
| "learning_rate": 1.2669517409896153e-05, |
| "loss": 0.1145, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.8179596823457542, |
| "grad_norm": 0.45902055501937866, |
| "learning_rate": 1.2364080635308493e-05, |
| "loss": 0.1138, |
| "step": 12500 |
| }, |
| { |
| "epoch": 3.9706780696395847, |
| "grad_norm": 31.397523880004883, |
| "learning_rate": 1.2058643860720831e-05, |
| "loss": 0.1101, |
| "step": 13000 |
| }, |
| { |
| "epoch": 4.123396456933415, |
| "grad_norm": 0.23616579174995422, |
| "learning_rate": 1.1753207086133171e-05, |
| "loss": 0.0771, |
| "step": 13500 |
| }, |
| { |
| "epoch": 4.276114844227245, |
| "grad_norm": 0.22658583521842957, |
| "learning_rate": 1.1447770311545512e-05, |
| "loss": 0.0693, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.428833231521075, |
| "grad_norm": 0.35704877972602844, |
| "learning_rate": 1.114233353695785e-05, |
| "loss": 0.0751, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.581551618814905, |
| "grad_norm": 0.17207038402557373, |
| "learning_rate": 1.083689676237019e-05, |
| "loss": 0.0699, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.734270006108735, |
| "grad_norm": 8.590784072875977, |
| "learning_rate": 1.053145998778253e-05, |
| "loss": 0.0692, |
| "step": 15500 |
| }, |
| { |
| "epoch": 4.886988393402565, |
| "grad_norm": 25.48099708557129, |
| "learning_rate": 1.0226023213194869e-05, |
| "loss": 0.0727, |
| "step": 16000 |
| }, |
| { |
| "epoch": 5.039706780696396, |
| "grad_norm": 4.0271501541137695, |
| "learning_rate": 9.920586438607209e-06, |
| "loss": 0.0659, |
| "step": 16500 |
| }, |
| { |
| "epoch": 5.192425167990226, |
| "grad_norm": 1.6429097652435303, |
| "learning_rate": 9.615149664019549e-06, |
| "loss": 0.0508, |
| "step": 17000 |
| }, |
| { |
| "epoch": 5.345143555284056, |
| "grad_norm": 0.03841910511255264, |
| "learning_rate": 9.309712889431889e-06, |
| "loss": 0.0462, |
| "step": 17500 |
| }, |
| { |
| "epoch": 5.497861942577886, |
| "grad_norm": 0.03901192545890808, |
| "learning_rate": 9.004276114844227e-06, |
| "loss": 0.0508, |
| "step": 18000 |
| }, |
| { |
| "epoch": 5.650580329871716, |
| "grad_norm": 24.19868278503418, |
| "learning_rate": 8.698839340256568e-06, |
| "loss": 0.0454, |
| "step": 18500 |
| }, |
| { |
| "epoch": 5.803298717165546, |
| "grad_norm": 12.015921592712402, |
| "learning_rate": 8.393402565668908e-06, |
| "loss": 0.0525, |
| "step": 19000 |
| }, |
| { |
| "epoch": 5.956017104459377, |
| "grad_norm": 0.1184128075838089, |
| "learning_rate": 8.087965791081248e-06, |
| "loss": 0.0524, |
| "step": 19500 |
| }, |
| { |
| "epoch": 6.108735491753207, |
| "grad_norm": 53.9754524230957, |
| "learning_rate": 7.782529016493586e-06, |
| "loss": 0.0431, |
| "step": 20000 |
| }, |
| { |
| "epoch": 6.261453879047037, |
| "grad_norm": 0.049675118178129196, |
| "learning_rate": 7.4770922419059255e-06, |
| "loss": 0.0311, |
| "step": 20500 |
| }, |
| { |
| "epoch": 6.414172266340867, |
| "grad_norm": 0.12765900790691376, |
| "learning_rate": 7.171655467318266e-06, |
| "loss": 0.0303, |
| "step": 21000 |
| }, |
| { |
| "epoch": 6.566890653634697, |
| "grad_norm": 0.04678593948483467, |
| "learning_rate": 6.866218692730605e-06, |
| "loss": 0.0315, |
| "step": 21500 |
| }, |
| { |
| "epoch": 6.719609040928528, |
| "grad_norm": 33.2860221862793, |
| "learning_rate": 6.560781918142944e-06, |
| "loss": 0.0342, |
| "step": 22000 |
| }, |
| { |
| "epoch": 6.872327428222358, |
| "grad_norm": 0.022564252838492393, |
| "learning_rate": 6.255345143555285e-06, |
| "loss": 0.0325, |
| "step": 22500 |
| }, |
| { |
| "epoch": 7.025045815516188, |
| "grad_norm": 48.42949676513672, |
| "learning_rate": 5.949908368967624e-06, |
| "loss": 0.0281, |
| "step": 23000 |
| }, |
| { |
| "epoch": 7.177764202810018, |
| "grad_norm": 0.1493069976568222, |
| "learning_rate": 5.644471594379963e-06, |
| "loss": 0.0255, |
| "step": 23500 |
| }, |
| { |
| "epoch": 7.330482590103848, |
| "grad_norm": 0.02931222692131996, |
| "learning_rate": 5.339034819792304e-06, |
| "loss": 0.0218, |
| "step": 24000 |
| }, |
| { |
| "epoch": 7.483200977397678, |
| "grad_norm": 99.97526550292969, |
| "learning_rate": 5.033598045204643e-06, |
| "loss": 0.0234, |
| "step": 24500 |
| }, |
| { |
| "epoch": 7.6359193646915084, |
| "grad_norm": 1.244328260421753, |
| "learning_rate": 4.728161270616982e-06, |
| "loss": 0.0248, |
| "step": 25000 |
| }, |
| { |
| "epoch": 7.788637751985339, |
| "grad_norm": 0.01413074042648077, |
| "learning_rate": 4.4227244960293225e-06, |
| "loss": 0.02, |
| "step": 25500 |
| }, |
| { |
| "epoch": 7.941356139279169, |
| "grad_norm": 0.007764923386275768, |
| "learning_rate": 4.117287721441662e-06, |
| "loss": 0.0242, |
| "step": 26000 |
| }, |
| { |
| "epoch": 8.094074526573, |
| "grad_norm": 0.04147506132721901, |
| "learning_rate": 3.8118509468540015e-06, |
| "loss": 0.0175, |
| "step": 26500 |
| }, |
| { |
| "epoch": 8.24679291386683, |
| "grad_norm": 41.88145446777344, |
| "learning_rate": 3.506414172266341e-06, |
| "loss": 0.0177, |
| "step": 27000 |
| }, |
| { |
| "epoch": 8.39951130116066, |
| "grad_norm": 0.005077702924609184, |
| "learning_rate": 3.2009773976786805e-06, |
| "loss": 0.019, |
| "step": 27500 |
| }, |
| { |
| "epoch": 8.55222968845449, |
| "grad_norm": 87.5268325805664, |
| "learning_rate": 2.8955406230910206e-06, |
| "loss": 0.013, |
| "step": 28000 |
| }, |
| { |
| "epoch": 8.70494807574832, |
| "grad_norm": 0.03963172435760498, |
| "learning_rate": 2.5901038485033603e-06, |
| "loss": 0.0152, |
| "step": 28500 |
| }, |
| { |
| "epoch": 8.85766646304215, |
| "grad_norm": 0.0064314561896026134, |
| "learning_rate": 2.2846670739156996e-06, |
| "loss": 0.0171, |
| "step": 29000 |
| }, |
| { |
| "epoch": 9.01038485033598, |
| "grad_norm": 0.0032072309404611588, |
| "learning_rate": 1.9792302993280393e-06, |
| "loss": 0.0161, |
| "step": 29500 |
| }, |
| { |
| "epoch": 9.16310323762981, |
| "grad_norm": 0.016286134719848633, |
| "learning_rate": 1.6737935247403788e-06, |
| "loss": 0.0117, |
| "step": 30000 |
| }, |
| { |
| "epoch": 9.315821624923641, |
| "grad_norm": 0.04490550234913826, |
| "learning_rate": 1.3683567501527185e-06, |
| "loss": 0.0122, |
| "step": 30500 |
| }, |
| { |
| "epoch": 9.46854001221747, |
| "grad_norm": 0.030366964638233185, |
| "learning_rate": 1.0629199755650582e-06, |
| "loss": 0.0076, |
| "step": 31000 |
| }, |
| { |
| "epoch": 9.621258399511301, |
| "grad_norm": 0.026300914585590363, |
| "learning_rate": 7.574832009773978e-07, |
| "loss": 0.0104, |
| "step": 31500 |
| }, |
| { |
| "epoch": 9.77397678680513, |
| "grad_norm": 0.0014681548345834017, |
| "learning_rate": 4.5204642638973736e-07, |
| "loss": 0.0107, |
| "step": 32000 |
| }, |
| { |
| "epoch": 9.926695174098962, |
| "grad_norm": 0.0018271030858159065, |
| "learning_rate": 1.4660965180207698e-07, |
| "loss": 0.0128, |
| "step": 32500 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 32740, |
| "total_flos": 7.032286628473344e+16, |
| "train_loss": 0.11521797929512122, |
| "train_runtime": 5803.3368, |
| "train_samples_per_second": 180.488, |
| "train_steps_per_second": 5.642 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 32740, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.032286628473344e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|