| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9944373127941804, |
| "eval_steps": 500, |
| "global_step": 3500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.08557980316645272, |
| "grad_norm": 2.8076744079589844, |
| "learning_rate": 1.9435414884516685e-05, |
| "loss": 0.2955077362060547, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.17115960633290545, |
| "grad_norm": 8.577657699584961, |
| "learning_rate": 1.8865126889078986e-05, |
| "loss": 0.07252912044525146, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.25673940949935814, |
| "grad_norm": 4.726356506347656, |
| "learning_rate": 1.829483889364129e-05, |
| "loss": 0.05829335689544678, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3423192126658109, |
| "grad_norm": 0.8415533900260925, |
| "learning_rate": 1.7724550898203593e-05, |
| "loss": 0.04327918529510498, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4278990158322636, |
| "grad_norm": 6.089684963226318, |
| "learning_rate": 1.7154262902765898e-05, |
| "loss": 0.029802947044372557, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5134788189987163, |
| "grad_norm": 0.24030651152133942, |
| "learning_rate": 1.6583974907328203e-05, |
| "loss": 0.021494226455688478, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.599058622165169, |
| "grad_norm": 4.7198333740234375, |
| "learning_rate": 1.6013686911890508e-05, |
| "loss": 0.021204140186309815, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6846384253316218, |
| "grad_norm": 6.918349742889404, |
| "learning_rate": 1.544339891645281e-05, |
| "loss": 0.01345762848854065, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7702182284980744, |
| "grad_norm": 7.747830390930176, |
| "learning_rate": 1.4873110921015115e-05, |
| "loss": 0.012243050336837768, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.8557980316645272, |
| "grad_norm": 0.10497640818357468, |
| "learning_rate": 1.4302822925577418e-05, |
| "loss": 0.01782155394554138, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9413778348309799, |
| "grad_norm": 4.261937141418457, |
| "learning_rate": 1.3732534930139723e-05, |
| "loss": 0.006643967628479004, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.0265297389816004, |
| "grad_norm": 0.1339312493801117, |
| "learning_rate": 1.3162246934702026e-05, |
| "loss": 0.009871820211410523, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.1121095421480531, |
| "grad_norm": 0.09724577516317368, |
| "learning_rate": 1.2591958939264331e-05, |
| "loss": 0.003651106059551239, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.1976893453145059, |
| "grad_norm": 11.75054931640625, |
| "learning_rate": 1.2021670943826633e-05, |
| "loss": 0.004486417174339294, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.2832691484809584, |
| "grad_norm": 0.0023671553935855627, |
| "learning_rate": 1.1451382948388938e-05, |
| "loss": 0.003775770664215088, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.3688489516474112, |
| "grad_norm": 0.008763855323195457, |
| "learning_rate": 1.0881094952951241e-05, |
| "loss": 0.00964130938053131, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.454428754813864, |
| "grad_norm": 0.01788765750825405, |
| "learning_rate": 1.0310806957513546e-05, |
| "loss": 0.002719729840755463, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.5400085579803167, |
| "grad_norm": 0.07950153201818466, |
| "learning_rate": 9.74051896207585e-06, |
| "loss": 0.003652033805847168, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.6255883611467694, |
| "grad_norm": 0.002788349287584424, |
| "learning_rate": 9.170230966638153e-06, |
| "loss": 0.002641911804676056, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.711168164313222, |
| "grad_norm": 3.524432897567749, |
| "learning_rate": 8.599942971200456e-06, |
| "loss": 0.004275954961776733, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.796747967479675, |
| "grad_norm": 0.0015159114263951778, |
| "learning_rate": 8.029654975762761e-06, |
| "loss": 0.0022863298654556274, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.8823277706461274, |
| "grad_norm": 0.0021803653798997402, |
| "learning_rate": 7.459366980325064e-06, |
| "loss": 0.004672373831272125, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.9679075738125802, |
| "grad_norm": 0.06295743584632874, |
| "learning_rate": 6.8890789848873685e-06, |
| "loss": 0.0033081093430519103, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.0530594779632008, |
| "grad_norm": 0.007334406953305006, |
| "learning_rate": 6.318790989449673e-06, |
| "loss": 0.0029252803325653075, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.1386392811296533, |
| "grad_norm": 0.007601665332913399, |
| "learning_rate": 5.748502994011976e-06, |
| "loss": 0.001956077367067337, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.2242190842961063, |
| "grad_norm": 0.0029085788410156965, |
| "learning_rate": 5.17821499857428e-06, |
| "loss": 0.0015319937467575073, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.309798887462559, |
| "grad_norm": 0.02407892607152462, |
| "learning_rate": 4.607927003136584e-06, |
| "loss": 0.0015379299223423004, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.3953786906290118, |
| "grad_norm": 0.0027564691845327616, |
| "learning_rate": 4.037639007698888e-06, |
| "loss": 0.0029152187705039976, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.4809584937954643, |
| "grad_norm": 0.0022422655019909143, |
| "learning_rate": 3.467351012261192e-06, |
| "loss": 0.0007979054003953934, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.566538296961917, |
| "grad_norm": 0.07274006307125092, |
| "learning_rate": 2.897063016823496e-06, |
| "loss": 0.0014514121413230895, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.65211810012837, |
| "grad_norm": 0.005161191802471876, |
| "learning_rate": 2.3267750213858e-06, |
| "loss": 0.003334296643733978, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.7376979032948223, |
| "grad_norm": 0.004177470691502094, |
| "learning_rate": 1.756487025948104e-06, |
| "loss": 0.0015054044127464294, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.8232777064612753, |
| "grad_norm": 0.0032875759061425924, |
| "learning_rate": 1.186199030510408e-06, |
| "loss": 0.0003650055080652237, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.908857509627728, |
| "grad_norm": 0.00569865433499217, |
| "learning_rate": 6.159110350727118e-07, |
| "loss": 0.0007427183538675308, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.9944373127941804, |
| "grad_norm": 0.0013147740392014384, |
| "learning_rate": 4.562303963501569e-08, |
| "loss": 0.0005363801494240761, |
| "step": 3500 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 3507, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.836818305327232e+16, |
| "train_batch_size": 96, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|