| { |
| "best_global_step": 2000, |
| "best_metric": 0.924, |
| "best_model_checkpoint": "./qlora-bert-sentiment/checkpoint-2000", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.0022401809692383, |
| "learning_rate": 0.0001225, |
| "loss": 0.703, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.6413620710372925, |
| "learning_rate": 0.0002475, |
| "loss": 0.6735, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 6.632427215576172, |
| "learning_rate": 0.0003725, |
| "loss": 0.5469, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.8693019151687622, |
| "learning_rate": 0.0004975, |
| "loss": 0.6154, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 7.796047687530518, |
| "learning_rate": 0.0004990863240477266, |
| "loss": 0.3135, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.8750956654548645, |
| "learning_rate": 0.0004962773315386935, |
| "loss": 0.3788, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.438046455383301, |
| "learning_rate": 0.0004915940198303324, |
| "loss": 0.3203, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.4791859984397888, |
| "learning_rate": 0.00048507203175260474, |
| "loss": 0.2855, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 6.871181964874268, |
| "learning_rate": 0.0004767610035728662, |
| "loss": 0.3167, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.08755309879779816, |
| "learning_rate": 0.0004667241872339007, |
| "loss": 0.2732, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.43568190932273865, |
| "learning_rate": 0.00045503796896844307, |
| "loss": 0.3608, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 3.768226385116577, |
| "learning_rate": 0.00044179128795382493, |
| "loss": 0.2945, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 4.359898567199707, |
| "learning_rate": 0.00042708495943113224, |
| "loss": 0.2697, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.9498214721679688, |
| "learning_rate": 0.00041103090744034666, |
| "loss": 0.3245, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.7939767241477966, |
| "learning_rate": 0.0003937513130108197, |
| "loss": 0.2775, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 7.53210973739624, |
| "learning_rate": 0.00037537768428986434, |
| "loss": 0.333, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.3197941780090332, |
| "learning_rate": 0.0003560498556863475, |
| "loss": 0.2535, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 4.376391410827637, |
| "learning_rate": 0.0003359149236464041, |
| "loss": 0.2585, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 4.018824577331543, |
| "learning_rate": 0.00031512612716066215, |
| "loss": 0.2368, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.9961744546890259, |
| "learning_rate": 0.00029384168152299676, |
| "loss": 0.2533, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.9185, |
| "eval_loss": 0.27835872769355774, |
| "eval_runtime": 47.563, |
| "eval_samples_per_second": 42.049, |
| "eval_steps_per_second": 21.025, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 3.986208915710449, |
| "learning_rate": 0.00027222357421661044, |
| "loss": 0.2054, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 4.131945610046387, |
| "learning_rate": 0.0002504363320914746, |
| "loss": 0.1693, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 0.10492417216300964, |
| "learning_rate": 0.00022864576921565816, |
| "loss": 0.2037, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.286766916513443, |
| "learning_rate": 0.00020701772493014758, |
| "loss": 0.2305, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 4.297652721405029, |
| "learning_rate": 0.000185716801711326, |
| "loss": 0.1592, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 1.9070364236831665, |
| "learning_rate": 0.00016490511244673752, |
| "loss": 0.1998, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 0.04250560328364372, |
| "learning_rate": 0.00014474104665812727, |
| "loss": 0.1699, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.16112375259399414, |
| "learning_rate": 0.00012537806506154246, |
| "loss": 0.2727, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 6.064806938171387, |
| "learning_rate": 0.000106963531638621, |
| "loss": 0.2407, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 1.4757925271987915, |
| "learning_rate": 8.963759210771053e-05, |
| "loss": 0.1953, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 0.12172006070613861, |
| "learning_rate": 7.353210733032975e-05, |
| "loss": 0.1397, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 1.6855131387710571, |
| "learning_rate": 5.8769649770392066e-05, |
| "loss": 0.2656, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 9.262821197509766, |
| "learning_rate": 4.54625706437441e-05, |
| "loss": 0.1447, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.10529584437608719, |
| "learning_rate": 3.3712144857573926e-05, |
| "loss": 0.1424, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 8.399650573730469, |
| "learning_rate": 2.360780024721515e-05, |
| "loss": 0.2099, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.10371621698141098, |
| "learning_rate": 1.5226436976322727e-05, |
| "loss": 0.1819, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 0.38396862149238586, |
| "learning_rate": 8.631842280193758e-06, |
| "loss": 0.2066, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.6245690584182739, |
| "learning_rate": 3.874205006390852e-06, |
| "loss": 0.1865, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 7.9351487159729, |
| "learning_rate": 9.897336473076167e-07, |
| "loss": 0.2503, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 4.218188285827637, |
| "learning_rate": 3.807716780768189e-10, |
| "loss": 0.165, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.924, |
| "eval_loss": 0.268412709236145, |
| "eval_runtime": 47.5747, |
| "eval_samples_per_second": 42.039, |
| "eval_steps_per_second": 21.02, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 2000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4231595753472000.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|