| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.03076804738279297, | |
| "eval_steps": 500, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0015384023691396484, | |
| "grad_norm": 1.1134390830993652, | |
| "learning_rate": 0.00019487179487179487, | |
| "loss": 1.9095, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.003076804738279297, | |
| "grad_norm": 0.6254091858863831, | |
| "learning_rate": 0.00018461538461538463, | |
| "loss": 1.2605, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.004615207107418946, | |
| "grad_norm": 0.7272558808326721, | |
| "learning_rate": 0.00017435897435897436, | |
| "loss": 1.1399, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.006153609476558594, | |
| "grad_norm": 0.49501851201057434, | |
| "learning_rate": 0.0001641025641025641, | |
| "loss": 1.0796, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.007692011845698243, | |
| "grad_norm": 0.6586689352989197, | |
| "learning_rate": 0.00015384615384615385, | |
| "loss": 1.1476, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.009230414214837892, | |
| "grad_norm": 0.624894917011261, | |
| "learning_rate": 0.0001435897435897436, | |
| "loss": 1.0346, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.010768816583977539, | |
| "grad_norm": 0.4778560400009155, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 1.0532, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.012307218953117188, | |
| "grad_norm": 0.5783376097679138, | |
| "learning_rate": 0.0001230769230769231, | |
| "loss": 1.0024, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.013845621322256836, | |
| "grad_norm": 0.544222891330719, | |
| "learning_rate": 0.00011282051282051283, | |
| "loss": 1.0234, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.015384023691396485, | |
| "grad_norm": 0.5201743245124817, | |
| "learning_rate": 0.00010256410256410256, | |
| "loss": 1.0451, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.016922426060536132, | |
| "grad_norm": 0.5806599259376526, | |
| "learning_rate": 9.230769230769232e-05, | |
| "loss": 1.0226, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.018460828429675783, | |
| "grad_norm": 0.5782334208488464, | |
| "learning_rate": 8.205128205128205e-05, | |
| "loss": 1.0224, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.01999923079881543, | |
| "grad_norm": 0.5596034526824951, | |
| "learning_rate": 7.17948717948718e-05, | |
| "loss": 1.0013, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.021537633167955077, | |
| "grad_norm": 0.5834138989448547, | |
| "learning_rate": 6.153846153846155e-05, | |
| "loss": 1.0763, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.023076035537094728, | |
| "grad_norm": 0.45252805948257446, | |
| "learning_rate": 5.128205128205128e-05, | |
| "loss": 1.0192, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.024614437906234375, | |
| "grad_norm": 0.5366165637969971, | |
| "learning_rate": 4.1025641025641023e-05, | |
| "loss": 0.962, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.026152840275374026, | |
| "grad_norm": 0.5441515445709229, | |
| "learning_rate": 3.0769230769230774e-05, | |
| "loss": 1.0803, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.027691242644513673, | |
| "grad_norm": 0.6809340119361877, | |
| "learning_rate": 2.0512820512820512e-05, | |
| "loss": 1.0303, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.02922964501365332, | |
| "grad_norm": 0.45926156640052795, | |
| "learning_rate": 1.0256410256410256e-05, | |
| "loss": 1.0258, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.03076804738279297, | |
| "grad_norm": 0.4480159878730774, | |
| "learning_rate": 0.0, | |
| "loss": 1.0755, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.6066405922193408e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |