| { | |
| "best_metric": 0.2750368118286133, | |
| "best_model_checkpoint": "./ryan_model314_3/checkpoint-550", | |
| "epoch": 0.88, | |
| "eval_steps": 50, | |
| "global_step": 550, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 1.5017586946487427, | |
| "learning_rate": 0.000192, | |
| "loss": 0.4423, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 0.33861014246940613, | |
| "eval_na_accuracy": 0.904, | |
| "eval_ordinal_accuracy": 0.4629418472063854, | |
| "eval_ordinal_mae": 0.6577621472191316, | |
| "eval_runtime": 123.3898, | |
| "eval_samples_per_second": 8.104, | |
| "eval_steps_per_second": 1.013, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.8501819372177124, | |
| "learning_rate": 0.00018400000000000003, | |
| "loss": 0.3088, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_loss": 0.3268783390522003, | |
| "eval_na_accuracy": 0.928, | |
| "eval_ordinal_accuracy": 0.5370581527936146, | |
| "eval_ordinal_mae": 0.5969413880658287, | |
| "eval_runtime": 43.4997, | |
| "eval_samples_per_second": 22.989, | |
| "eval_steps_per_second": 2.874, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.5424334406852722, | |
| "learning_rate": 0.00017600000000000002, | |
| "loss": 0.316, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_loss": 0.3395713269710541, | |
| "eval_na_accuracy": 0.902, | |
| "eval_ordinal_accuracy": 0.5142531356898518, | |
| "eval_ordinal_mae": 0.6323422620227872, | |
| "eval_runtime": 43.339, | |
| "eval_samples_per_second": 23.074, | |
| "eval_steps_per_second": 2.884, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.2484453916549683, | |
| "learning_rate": 0.000168, | |
| "loss": 0.2821, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_loss": 0.32339948415756226, | |
| "eval_na_accuracy": 0.927, | |
| "eval_ordinal_accuracy": 0.5131128848346637, | |
| "eval_ordinal_mae": 0.6292874569299393, | |
| "eval_runtime": 42.0004, | |
| "eval_samples_per_second": 23.809, | |
| "eval_steps_per_second": 2.976, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.4807660579681396, | |
| "learning_rate": 0.00016, | |
| "loss": 0.2731, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 0.3313509225845337, | |
| "eval_na_accuracy": 0.925, | |
| "eval_ordinal_accuracy": 0.508551881413911, | |
| "eval_ordinal_mae": 0.5856009521101041, | |
| "eval_runtime": 55.6564, | |
| "eval_samples_per_second": 17.967, | |
| "eval_steps_per_second": 2.246, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.4179209470748901, | |
| "learning_rate": 0.000152, | |
| "loss": 0.2975, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_loss": 0.3036611080169678, | |
| "eval_na_accuracy": 0.927, | |
| "eval_ordinal_accuracy": 0.5963511972633979, | |
| "eval_ordinal_mae": 0.5690023564742932, | |
| "eval_runtime": 42.7034, | |
| "eval_samples_per_second": 23.417, | |
| "eval_steps_per_second": 2.927, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.7659221887588501, | |
| "learning_rate": 0.000144, | |
| "loss": 0.2609, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_loss": 0.3209022283554077, | |
| "eval_na_accuracy": 0.928, | |
| "eval_ordinal_accuracy": 0.5450399087799316, | |
| "eval_ordinal_mae": 0.5764862077817825, | |
| "eval_runtime": 43.1206, | |
| "eval_samples_per_second": 23.191, | |
| "eval_steps_per_second": 2.899, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.0847711563110352, | |
| "learning_rate": 0.00013600000000000003, | |
| "loss": 0.287, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_loss": 0.29075464606285095, | |
| "eval_na_accuracy": 0.931, | |
| "eval_ordinal_accuracy": 0.5826681870011402, | |
| "eval_ordinal_mae": 0.5458187616535902, | |
| "eval_runtime": 42.3269, | |
| "eval_samples_per_second": 23.626, | |
| "eval_steps_per_second": 2.953, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.9720218181610107, | |
| "learning_rate": 0.00012800000000000002, | |
| "loss": 0.2905, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_loss": 0.30074238777160645, | |
| "eval_na_accuracy": 0.919, | |
| "eval_ordinal_accuracy": 0.5986316989737742, | |
| "eval_ordinal_mae": 0.548372159519042, | |
| "eval_runtime": 76.7524, | |
| "eval_samples_per_second": 13.029, | |
| "eval_steps_per_second": 1.629, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.8414099216461182, | |
| "learning_rate": 0.00012, | |
| "loss": 0.2574, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 0.28344637155532837, | |
| "eval_na_accuracy": 0.929, | |
| "eval_ordinal_accuracy": 0.6031927023945268, | |
| "eval_ordinal_mae": 0.5363022306512, | |
| "eval_runtime": 42.8484, | |
| "eval_samples_per_second": 23.338, | |
| "eval_steps_per_second": 2.917, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.5895617604255676, | |
| "learning_rate": 0.00011200000000000001, | |
| "loss": 0.2855, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_loss": 0.2750368118286133, | |
| "eval_na_accuracy": 0.931, | |
| "eval_ordinal_accuracy": 0.6271379703534777, | |
| "eval_ordinal_mae": 0.5319093595330124, | |
| "eval_runtime": 42.3171, | |
| "eval_samples_per_second": 23.631, | |
| "eval_steps_per_second": 2.954, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "step": 550, | |
| "total_flos": 6.81953956282368e+17, | |
| "train_loss": 0.3000895881652832, | |
| "train_runtime": 2172.3633, | |
| "train_samples_per_second": 9.207, | |
| "train_steps_per_second": 0.575 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 1250, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 50, | |
| "total_flos": 6.81953956282368e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |