| { |
| "best_metric": 2.35801100730896, |
| "best_model_checkpoint": "miner_id_24/checkpoint-300", |
| "epoch": 0.13652809065465218, |
| "eval_steps": 100, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0001365280906546522, |
| "eval_loss": 3.0683064460754395, |
| "eval_runtime": 176.1658, |
| "eval_samples_per_second": 35.012, |
| "eval_steps_per_second": 8.753, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00682640453273261, |
| "grad_norm": 47.15786361694336, |
| "learning_rate": 0.00025, |
| "loss": 8.7525, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01365280906546522, |
| "grad_norm": 86.82730102539062, |
| "learning_rate": 0.00025, |
| "loss": 8.94, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.01365280906546522, |
| "eval_loss": 2.436293601989746, |
| "eval_runtime": 176.1858, |
| "eval_samples_per_second": 35.009, |
| "eval_steps_per_second": 8.752, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.020479213598197828, |
| "grad_norm": 88.40423583984375, |
| "learning_rate": 0.00025, |
| "loss": 8.5322, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.02730561813093044, |
| "grad_norm": 85.16395568847656, |
| "learning_rate": 0.00025, |
| "loss": 9.0278, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.02730561813093044, |
| "eval_loss": 2.371351718902588, |
| "eval_runtime": 176.2967, |
| "eval_samples_per_second": 34.986, |
| "eval_steps_per_second": 8.747, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.034132022663663046, |
| "grad_norm": 63.54459762573242, |
| "learning_rate": 0.00025, |
| "loss": 8.8962, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.040958427196395655, |
| "grad_norm": 59.64451599121094, |
| "learning_rate": 0.00025, |
| "loss": 8.8704, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.040958427196395655, |
| "eval_loss": 2.35801100730896, |
| "eval_runtime": 176.4245, |
| "eval_samples_per_second": 34.961, |
| "eval_steps_per_second": 8.74, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.04778483172912827, |
| "grad_norm": 50.603660583496094, |
| "learning_rate": 0.00025, |
| "loss": 8.7621, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.05461123626186088, |
| "grad_norm": 74.71147155761719, |
| "learning_rate": 0.00025, |
| "loss": 8.8523, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.05461123626186088, |
| "eval_loss": 2.363450765609741, |
| "eval_runtime": 176.6248, |
| "eval_samples_per_second": 34.921, |
| "eval_steps_per_second": 8.73, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.06143764079459349, |
| "grad_norm": 78.24207305908203, |
| "learning_rate": 0.00025, |
| "loss": 8.8188, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.06826404532732609, |
| "grad_norm": 52.44586944580078, |
| "learning_rate": 0.00025, |
| "loss": 8.8018, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.06826404532732609, |
| "eval_loss": 2.367098093032837, |
| "eval_runtime": 176.6146, |
| "eval_samples_per_second": 34.924, |
| "eval_steps_per_second": 8.731, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.07509044986005871, |
| "grad_norm": 102.29866027832031, |
| "learning_rate": 0.00025, |
| "loss": 8.8246, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.08191685439279131, |
| "grad_norm": 86.25479125976562, |
| "learning_rate": 0.00025, |
| "loss": 9.1508, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.08191685439279131, |
| "eval_loss": 2.4814395904541016, |
| "eval_runtime": 176.6356, |
| "eval_samples_per_second": 34.919, |
| "eval_steps_per_second": 8.73, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.08874325892552393, |
| "grad_norm": 74.26067352294922, |
| "learning_rate": 0.00025, |
| "loss": 9.1596, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.09556966345825654, |
| "grad_norm": 75.83834075927734, |
| "learning_rate": 0.00025, |
| "loss": 9.2521, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.09556966345825654, |
| "eval_loss": 2.4691531658172607, |
| "eval_runtime": 176.2943, |
| "eval_samples_per_second": 34.987, |
| "eval_steps_per_second": 8.747, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.10239606799098915, |
| "grad_norm": 99.25703430175781, |
| "learning_rate": 0.00025, |
| "loss": 8.9744, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.10922247252372176, |
| "grad_norm": 56.16615676879883, |
| "learning_rate": 0.00025, |
| "loss": 9.1914, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.10922247252372176, |
| "eval_loss": 2.4960551261901855, |
| "eval_runtime": 176.3333, |
| "eval_samples_per_second": 34.979, |
| "eval_steps_per_second": 8.745, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.11604887705645436, |
| "grad_norm": 100.18714141845703, |
| "learning_rate": 0.00025, |
| "loss": 9.0877, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.12287528158918698, |
| "grad_norm": 110.27277374267578, |
| "learning_rate": 0.00025, |
| "loss": 9.0612, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.12287528158918698, |
| "eval_loss": 2.5282108783721924, |
| "eval_runtime": 176.2988, |
| "eval_samples_per_second": 34.986, |
| "eval_steps_per_second": 8.747, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.1297016861219196, |
| "grad_norm": 180.63604736328125, |
| "learning_rate": 0.00025, |
| "loss": 9.1691, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.13652809065465218, |
| "grad_norm": 113.0001449584961, |
| "learning_rate": 0.00025, |
| "loss": 9.0877, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.13652809065465218, |
| "eval_loss": 2.445781707763672, |
| "eval_runtime": 176.6879, |
| "eval_samples_per_second": 34.909, |
| "eval_steps_per_second": 8.727, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 1000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.53626333642752e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|