| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 1000, |
| "global_step": 19048, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02624947501049979, |
| "grad_norm": 1.2762126922607422, |
| "learning_rate": 4.868752624947501e-05, |
| "loss": 4.3755, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05249895002099958, |
| "grad_norm": 2.1579689979553223, |
| "learning_rate": 4.7375052498950025e-05, |
| "loss": 2.9041, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.05249895002099958, |
| "eval_accuracy": 0.4456327704194102, |
| "eval_loss": 2.4551732540130615, |
| "eval_runtime": 52.3921, |
| "eval_samples_per_second": 116.945, |
| "eval_steps_per_second": 3.665, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.07874842503149937, |
| "grad_norm": 2.8109207153320312, |
| "learning_rate": 4.606257874842503e-05, |
| "loss": 2.2866, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.10499790004199916, |
| "grad_norm": 1.555198073387146, |
| "learning_rate": 4.475010499790005e-05, |
| "loss": 1.975, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.10499790004199916, |
| "eval_accuracy": 0.566995340241206, |
| "eval_loss": 1.800054907798767, |
| "eval_runtime": 52.4411, |
| "eval_samples_per_second": 116.836, |
| "eval_steps_per_second": 3.661, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.13124737505249895, |
| "grad_norm": 1.6156566143035889, |
| "learning_rate": 4.3437631247375055e-05, |
| "loss": 1.8239, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.15749685006299874, |
| "grad_norm": 1.3470197916030884, |
| "learning_rate": 4.212515749685006e-05, |
| "loss": 1.7304, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.15749685006299874, |
| "eval_accuracy": 0.6007325235911557, |
| "eval_loss": 1.610120177268982, |
| "eval_runtime": 52.0749, |
| "eval_samples_per_second": 117.657, |
| "eval_steps_per_second": 3.687, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.18374632507349853, |
| "grad_norm": 1.3028109073638916, |
| "learning_rate": 4.081268374632508e-05, |
| "loss": 1.6586, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.20999580008399832, |
| "grad_norm": 1.4646774530410767, |
| "learning_rate": 3.9500209995800084e-05, |
| "loss": 1.6082, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.20999580008399832, |
| "eval_accuracy": 0.6184141440200028, |
| "eval_loss": 1.5135186910629272, |
| "eval_runtime": 52.2531, |
| "eval_samples_per_second": 117.256, |
| "eval_steps_per_second": 3.674, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.2362452750944981, |
| "grad_norm": 1.2850710153579712, |
| "learning_rate": 3.81877362452751e-05, |
| "loss": 1.5687, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.2624947501049979, |
| "grad_norm": 1.3608484268188477, |
| "learning_rate": 3.687526249475011e-05, |
| "loss": 1.5347, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.2624947501049979, |
| "eval_accuracy": 0.6303795149938871, |
| "eval_loss": 1.4518604278564453, |
| "eval_runtime": 52.3842, |
| "eval_samples_per_second": 116.963, |
| "eval_steps_per_second": 3.665, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.2887442251154977, |
| "grad_norm": 1.2262423038482666, |
| "learning_rate": 3.5562788744225114e-05, |
| "loss": 1.5052, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.3149937001259975, |
| "grad_norm": 1.1985517740249634, |
| "learning_rate": 3.425031499370013e-05, |
| "loss": 1.481, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.3149937001259975, |
| "eval_accuracy": 0.6388963740927813, |
| "eval_loss": 1.406346082687378, |
| "eval_runtime": 52.3896, |
| "eval_samples_per_second": 116.951, |
| "eval_steps_per_second": 3.665, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.34124317513649727, |
| "grad_norm": 1.2099275588989258, |
| "learning_rate": 3.2937841243175137e-05, |
| "loss": 1.4638, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.36749265014699706, |
| "grad_norm": 1.30404794216156, |
| "learning_rate": 3.162536749265015e-05, |
| "loss": 1.4437, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.36749265014699706, |
| "eval_accuracy": 0.6458597675369553, |
| "eval_loss": 1.3707749843597412, |
| "eval_runtime": 51.9205, |
| "eval_samples_per_second": 118.007, |
| "eval_steps_per_second": 3.698, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.39374212515749685, |
| "grad_norm": 1.2142497301101685, |
| "learning_rate": 3.031289374212516e-05, |
| "loss": 1.4271, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.41999160016799664, |
| "grad_norm": 1.209383487701416, |
| "learning_rate": 2.900041999160017e-05, |
| "loss": 1.4146, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.41999160016799664, |
| "eval_accuracy": 0.6510902099755246, |
| "eval_loss": 1.342362880706787, |
| "eval_runtime": 52.0734, |
| "eval_samples_per_second": 117.661, |
| "eval_steps_per_second": 3.687, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.4462410751784964, |
| "grad_norm": 1.0856986045837402, |
| "learning_rate": 2.768794624107518e-05, |
| "loss": 1.399, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.4724905501889962, |
| "grad_norm": 1.1674331426620483, |
| "learning_rate": 2.6375472490550192e-05, |
| "loss": 1.3851, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.4724905501889962, |
| "eval_accuracy": 0.6559099899312707, |
| "eval_loss": 1.3188802003860474, |
| "eval_runtime": 52.2608, |
| "eval_samples_per_second": 117.239, |
| "eval_steps_per_second": 3.674, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.498740025199496, |
| "grad_norm": 1.103853464126587, |
| "learning_rate": 2.50629987400252e-05, |
| "loss": 1.3755, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.5249895002099958, |
| "grad_norm": 1.1234028339385986, |
| "learning_rate": 2.375052498950021e-05, |
| "loss": 1.3649, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.5249895002099958, |
| "eval_accuracy": 0.659545964283851, |
| "eval_loss": 1.2999330759048462, |
| "eval_runtime": 52.336, |
| "eval_samples_per_second": 117.07, |
| "eval_steps_per_second": 3.669, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.5512389752204956, |
| "grad_norm": 1.092689037322998, |
| "learning_rate": 2.2438051238975222e-05, |
| "loss": 1.3586, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.5774884502309954, |
| "grad_norm": 1.1136916875839233, |
| "learning_rate": 2.1125577488450233e-05, |
| "loss": 1.3483, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.5774884502309954, |
| "eval_accuracy": 0.6628488138251902, |
| "eval_loss": 1.284113883972168, |
| "eval_runtime": 52.2557, |
| "eval_samples_per_second": 117.25, |
| "eval_steps_per_second": 3.674, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.6037379252414952, |
| "grad_norm": 1.1153841018676758, |
| "learning_rate": 1.9813103737925244e-05, |
| "loss": 1.3389, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.629987400251995, |
| "grad_norm": 1.117702841758728, |
| "learning_rate": 1.8500629987400252e-05, |
| "loss": 1.3335, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.629987400251995, |
| "eval_accuracy": 0.6658040201846832, |
| "eval_loss": 1.2690930366516113, |
| "eval_runtime": 51.8646, |
| "eval_samples_per_second": 118.135, |
| "eval_steps_per_second": 3.702, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.6562368752624947, |
| "grad_norm": 1.1310999393463135, |
| "learning_rate": 1.7188156236875263e-05, |
| "loss": 1.3266, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.6824863502729945, |
| "grad_norm": 1.1650059223175049, |
| "learning_rate": 1.5875682486350274e-05, |
| "loss": 1.3166, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.6824863502729945, |
| "eval_accuracy": 0.6683760053772215, |
| "eval_loss": 1.2568169832229614, |
| "eval_runtime": 51.9065, |
| "eval_samples_per_second": 118.039, |
| "eval_steps_per_second": 3.699, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.7087358252834943, |
| "grad_norm": 1.0919125080108643, |
| "learning_rate": 1.4563208735825285e-05, |
| "loss": 1.3098, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.7349853002939941, |
| "grad_norm": 1.129116177558899, |
| "learning_rate": 1.3250734985300295e-05, |
| "loss": 1.307, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.7349853002939941, |
| "eval_accuracy": 0.6705609403819863, |
| "eval_loss": 1.2448893785476685, |
| "eval_runtime": 51.8742, |
| "eval_samples_per_second": 118.113, |
| "eval_steps_per_second": 3.701, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.7612347753044939, |
| "grad_norm": 1.131659984588623, |
| "learning_rate": 1.1938261234775306e-05, |
| "loss": 1.2994, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.7874842503149937, |
| "grad_norm": 1.100225567817688, |
| "learning_rate": 1.0625787484250315e-05, |
| "loss": 1.2959, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.7874842503149937, |
| "eval_accuracy": 0.6726351528680722, |
| "eval_loss": 1.234655499458313, |
| "eval_runtime": 52.0418, |
| "eval_samples_per_second": 117.732, |
| "eval_steps_per_second": 3.689, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.8137337253254935, |
| "grad_norm": 1.1060478687286377, |
| "learning_rate": 9.313313733725326e-06, |
| "loss": 1.2905, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.8399832003359933, |
| "grad_norm": 1.1289230585098267, |
| "learning_rate": 8.000839983200337e-06, |
| "loss": 1.286, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.8399832003359933, |
| "eval_accuracy": 0.6743912375411241, |
| "eval_loss": 1.2270058393478394, |
| "eval_runtime": 52.0522, |
| "eval_samples_per_second": 117.709, |
| "eval_steps_per_second": 3.689, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.8662326753464931, |
| "grad_norm": 1.1142446994781494, |
| "learning_rate": 6.6883662326753475e-06, |
| "loss": 1.2826, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.8924821503569929, |
| "grad_norm": 1.1280709505081177, |
| "learning_rate": 5.375892482150358e-06, |
| "loss": 1.2818, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.8924821503569929, |
| "eval_accuracy": 0.6755184055446775, |
| "eval_loss": 1.2210115194320679, |
| "eval_runtime": 52.3903, |
| "eval_samples_per_second": 116.949, |
| "eval_steps_per_second": 3.665, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.9187316253674926, |
| "grad_norm": 1.1283568143844604, |
| "learning_rate": 4.063418731625368e-06, |
| "loss": 1.2752, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.9449811003779924, |
| "grad_norm": 1.1261204481124878, |
| "learning_rate": 2.7509449811003783e-06, |
| "loss": 1.2701, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.9449811003779924, |
| "eval_accuracy": 0.6768737193720215, |
| "eval_loss": 1.2154804468154907, |
| "eval_runtime": 51.9422, |
| "eval_samples_per_second": 117.958, |
| "eval_steps_per_second": 3.696, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.9712305753884922, |
| "grad_norm": 1.1426098346710205, |
| "learning_rate": 1.4384712305753885e-06, |
| "loss": 1.2699, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.997480050398992, |
| "grad_norm": 1.1207588911056519, |
| "learning_rate": 1.25997480050399e-07, |
| "loss": 1.2691, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.997480050398992, |
| "eval_accuracy": 0.6775645385447583, |
| "eval_loss": 1.2121435403823853, |
| "eval_runtime": 51.8514, |
| "eval_samples_per_second": 118.165, |
| "eval_steps_per_second": 3.703, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 19048, |
| "total_flos": 3.18526483857408e+17, |
| "train_loss": 1.5567941711710962, |
| "train_runtime": 7364.253, |
| "train_samples_per_second": 82.768, |
| "train_steps_per_second": 2.587 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 19048, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.18526483857408e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|