| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.11121408711770157, |
| "eval_steps": 1000, |
| "global_step": 30, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0037071362372567192, |
| "grad_norm": 4.805793835959296, |
| "learning_rate": 0.0, |
| "loss": 0.4732, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0074142724745134385, |
| "grad_norm": 4.999676761082255, |
| "learning_rate": 1.8518518518518518e-07, |
| "loss": 0.5107, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.011121408711770158, |
| "grad_norm": 4.864588159866656, |
| "learning_rate": 3.7037037037037036e-07, |
| "loss": 0.4708, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.014828544949026877, |
| "grad_norm": 5.136331269447859, |
| "learning_rate": 5.555555555555555e-07, |
| "loss": 0.5024, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.018535681186283594, |
| "grad_norm": 4.731519955515801, |
| "learning_rate": 7.407407407407407e-07, |
| "loss": 0.4598, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.022242817423540315, |
| "grad_norm": 4.567784280784228, |
| "learning_rate": 9.259259259259259e-07, |
| "loss": 0.4717, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.025949953660797033, |
| "grad_norm": 4.522993398842187, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 0.4649, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.029657089898053754, |
| "grad_norm": 2.864282407749261, |
| "learning_rate": 1.2962962962962962e-06, |
| "loss": 0.4499, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.033364226135310475, |
| "grad_norm": 2.781272716749165, |
| "learning_rate": 1.4814814814814815e-06, |
| "loss": 0.4952, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.03707136237256719, |
| "grad_norm": 2.546317122615437, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 0.4487, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04077849860982391, |
| "grad_norm": 2.700137363750789, |
| "learning_rate": 1.8518518518518519e-06, |
| "loss": 0.4335, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.04448563484708063, |
| "grad_norm": 2.7541576896447233, |
| "learning_rate": 2.037037037037037e-06, |
| "loss": 0.4371, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.04819277108433735, |
| "grad_norm": 2.8211373607312447, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 0.4374, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.051899907321594066, |
| "grad_norm": 2.762988661288179, |
| "learning_rate": 2.4074074074074075e-06, |
| "loss": 0.4609, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.05560704355885079, |
| "grad_norm": 2.0744764582695923, |
| "learning_rate": 2.5925925925925925e-06, |
| "loss": 0.3954, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05931417979610751, |
| "grad_norm": 2.0106943337826277, |
| "learning_rate": 2.7777777777777783e-06, |
| "loss": 0.3589, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.06302131603336422, |
| "grad_norm": 1.9151145546731518, |
| "learning_rate": 2.962962962962963e-06, |
| "loss": 0.374, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.06672845227062095, |
| "grad_norm": 1.7742581344696668, |
| "learning_rate": 3.1481481481481483e-06, |
| "loss": 0.4013, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.07043558850787766, |
| "grad_norm": 1.5873818678054419, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.3847, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.07414272474513438, |
| "grad_norm": 1.1782654410938447, |
| "learning_rate": 3.5185185185185187e-06, |
| "loss": 0.3392, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0778498609823911, |
| "grad_norm": 1.2100197827995287, |
| "learning_rate": 3.7037037037037037e-06, |
| "loss": 0.3174, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.08155699721964782, |
| "grad_norm": 1.3105688995199969, |
| "learning_rate": 3.88888888888889e-06, |
| "loss": 0.3774, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.08526413345690455, |
| "grad_norm": 1.0431241378849054, |
| "learning_rate": 4.074074074074074e-06, |
| "loss": 0.3329, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.08897126969416126, |
| "grad_norm": 0.8631707844136063, |
| "learning_rate": 4.2592592592592596e-06, |
| "loss": 0.2954, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.09267840593141798, |
| "grad_norm": 0.890999961539687, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 0.3143, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0963855421686747, |
| "grad_norm": 0.8696087553408013, |
| "learning_rate": 4.62962962962963e-06, |
| "loss": 0.3012, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.10009267840593142, |
| "grad_norm": 0.9182891405006179, |
| "learning_rate": 4.814814814814815e-06, |
| "loss": 0.2918, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.10379981464318813, |
| "grad_norm": 0.9323649589307044, |
| "learning_rate": 5e-06, |
| "loss": 0.2953, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.10750695088044486, |
| "grad_norm": 0.812755436108598, |
| "learning_rate": 5.185185185185185e-06, |
| "loss": 0.3003, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.11121408711770157, |
| "grad_norm": 0.7059616195446038, |
| "learning_rate": 5.370370370370371e-06, |
| "loss": 0.2843, |
| "step": 30 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 538, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8820667908096.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|