| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 3770, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.13262599469496023, | |
| "grad_norm": 5.429837703704834, | |
| "learning_rate": 4.967957276368492e-05, | |
| "loss": 2.3201, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.26525198938992045, | |
| "grad_norm": 6.075057029724121, | |
| "learning_rate": 4.901869158878505e-05, | |
| "loss": 1.9594, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3978779840848806, | |
| "grad_norm": 3.753885507583618, | |
| "learning_rate": 4.835113484646196e-05, | |
| "loss": 1.9226, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5305039787798409, | |
| "grad_norm": 5.202290058135986, | |
| "learning_rate": 4.768357810413886e-05, | |
| "loss": 1.8727, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6631299734748011, | |
| "grad_norm": 4.6010637283325195, | |
| "learning_rate": 4.701602136181576e-05, | |
| "loss": 1.815, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7957559681697612, | |
| "grad_norm": 3.4173789024353027, | |
| "learning_rate": 4.634846461949266e-05, | |
| "loss": 1.828, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9283819628647215, | |
| "grad_norm": 8.333471298217773, | |
| "learning_rate": 4.568090787716956e-05, | |
| "loss": 1.8053, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 128.048, | |
| "eval_loss": 1.6762282848358154, | |
| "eval_rouge1": 40.7296, | |
| "eval_rouge2": 14.8309, | |
| "eval_rougeL": 26.5174, | |
| "eval_rougeLsum": 37.7044, | |
| "eval_runtime": 609.5513, | |
| "eval_samples_per_second": 1.641, | |
| "eval_steps_per_second": 0.205, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.0610079575596818, | |
| "grad_norm": 2.8465678691864014, | |
| "learning_rate": 4.5013351134846464e-05, | |
| "loss": 1.7625, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.193633952254642, | |
| "grad_norm": 3.454566717147827, | |
| "learning_rate": 4.4345794392523364e-05, | |
| "loss": 1.7285, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.3262599469496021, | |
| "grad_norm": 24.434019088745117, | |
| "learning_rate": 4.367823765020027e-05, | |
| "loss": 1.6906, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4588859416445623, | |
| "grad_norm": 2.913654327392578, | |
| "learning_rate": 4.301068090787717e-05, | |
| "loss": 1.7003, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.5915119363395225, | |
| "grad_norm": 3.6399612426757812, | |
| "learning_rate": 4.234312416555408e-05, | |
| "loss": 1.7144, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.7241379310344827, | |
| "grad_norm": 3.5709495544433594, | |
| "learning_rate": 4.167556742323098e-05, | |
| "loss": 1.6836, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.8567639257294428, | |
| "grad_norm": 12.881651878356934, | |
| "learning_rate": 4.100801068090788e-05, | |
| "loss": 1.7071, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.9893899204244032, | |
| "grad_norm": 7.4129133224487305, | |
| "learning_rate": 4.0340453938584784e-05, | |
| "loss": 1.6916, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gen_len": 127.846, | |
| "eval_loss": 1.6203532218933105, | |
| "eval_rouge1": 41.7668, | |
| "eval_rouge2": 15.5459, | |
| "eval_rougeL": 26.7721, | |
| "eval_rougeLsum": 38.8375, | |
| "eval_runtime": 602.6964, | |
| "eval_samples_per_second": 1.659, | |
| "eval_steps_per_second": 0.207, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 2.1220159151193636, | |
| "grad_norm": 2.7983736991882324, | |
| "learning_rate": 3.9679572763684914e-05, | |
| "loss": 1.6306, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.2546419098143238, | |
| "grad_norm": 3.959669589996338, | |
| "learning_rate": 3.901201602136182e-05, | |
| "loss": 1.6281, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.387267904509284, | |
| "grad_norm": 6.940447807312012, | |
| "learning_rate": 3.835113484646195e-05, | |
| "loss": 1.6283, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.519893899204244, | |
| "grad_norm": 3.5200612545013428, | |
| "learning_rate": 3.768357810413886e-05, | |
| "loss": 1.5985, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.6525198938992043, | |
| "grad_norm": 4.220067024230957, | |
| "learning_rate": 3.701602136181576e-05, | |
| "loss": 1.5914, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.7851458885941645, | |
| "grad_norm": 3.168060302734375, | |
| "learning_rate": 3.636181575433912e-05, | |
| "loss": 1.6099, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.9177718832891246, | |
| "grad_norm": 4.414212226867676, | |
| "learning_rate": 3.5700934579439256e-05, | |
| "loss": 1.6376, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_gen_len": 127.821, | |
| "eval_loss": 1.5945690870285034, | |
| "eval_rouge1": 42.6661, | |
| "eval_rouge2": 16.1955, | |
| "eval_rougeL": 27.2585, | |
| "eval_rougeLsum": 39.7771, | |
| "eval_runtime": 599.573, | |
| "eval_samples_per_second": 1.668, | |
| "eval_steps_per_second": 0.208, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 3.050397877984085, | |
| "grad_norm": 3.4827208518981934, | |
| "learning_rate": 3.5033377837116156e-05, | |
| "loss": 1.6044, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.183023872679045, | |
| "grad_norm": 4.390041351318359, | |
| "learning_rate": 3.436582109479306e-05, | |
| "loss": 1.5388, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.315649867374005, | |
| "grad_norm": 2.9073328971862793, | |
| "learning_rate": 3.369826435246996e-05, | |
| "loss": 1.5563, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.4482758620689653, | |
| "grad_norm": 3.0479063987731934, | |
| "learning_rate": 3.303070761014686e-05, | |
| "loss": 1.5731, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.5809018567639255, | |
| "grad_norm": 2.9282639026641846, | |
| "learning_rate": 3.236315086782377e-05, | |
| "loss": 1.5433, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 3.713527851458886, | |
| "grad_norm": 4.056313514709473, | |
| "learning_rate": 3.169559412550067e-05, | |
| "loss": 1.5509, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.8461538461538463, | |
| "grad_norm": 2.7992489337921143, | |
| "learning_rate": 3.102803738317757e-05, | |
| "loss": 1.5407, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 3.9787798408488064, | |
| "grad_norm": 4.021698474884033, | |
| "learning_rate": 3.0360480640854473e-05, | |
| "loss": 1.5609, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_gen_len": 127.946, | |
| "eval_loss": 1.5801172256469727, | |
| "eval_rouge1": 42.7385, | |
| "eval_rouge2": 16.2447, | |
| "eval_rougeL": 27.2408, | |
| "eval_rougeLsum": 39.7737, | |
| "eval_runtime": 599.4838, | |
| "eval_samples_per_second": 1.668, | |
| "eval_steps_per_second": 0.209, | |
| "step": 3016 | |
| }, | |
| { | |
| "epoch": 4.111405835543766, | |
| "grad_norm": 4.38268518447876, | |
| "learning_rate": 2.9692923898531376e-05, | |
| "loss": 1.4975, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 4.244031830238727, | |
| "grad_norm": 3.2881453037261963, | |
| "learning_rate": 2.9025367156208276e-05, | |
| "loss": 1.5086, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 4.376657824933687, | |
| "grad_norm": 14.788896560668945, | |
| "learning_rate": 2.8357810413885183e-05, | |
| "loss": 1.5175, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 4.5092838196286475, | |
| "grad_norm": 3.1153130531311035, | |
| "learning_rate": 2.7690253671562083e-05, | |
| "loss": 1.498, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 4.641909814323608, | |
| "grad_norm": 5.817938804626465, | |
| "learning_rate": 2.702269692923899e-05, | |
| "loss": 1.4979, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.774535809018568, | |
| "grad_norm": 7.6480584144592285, | |
| "learning_rate": 2.635514018691589e-05, | |
| "loss": 1.507, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 4.907161803713528, | |
| "grad_norm": 4.517747402191162, | |
| "learning_rate": 2.5687583444592793e-05, | |
| "loss": 1.5067, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_gen_len": 127.887, | |
| "eval_loss": 1.569287896156311, | |
| "eval_rouge1": 43.3524, | |
| "eval_rouge2": 16.6388, | |
| "eval_rougeL": 27.57, | |
| "eval_rougeLsum": 40.4783, | |
| "eval_runtime": 598.5089, | |
| "eval_samples_per_second": 1.671, | |
| "eval_steps_per_second": 0.209, | |
| "step": 3770 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 7540, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.708845744422912e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |