{ "best_metric": 0.03272353485226631, "best_model_checkpoint": "./codellama_sql_model_forestry/checkpoint-400", "epoch": 2.0, "eval_steps": 100, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05006257822277847, "grad_norm": 7.185255527496338, "learning_rate": 0.0001979899497487437, "loss": 32.456, "step": 10 }, { "epoch": 0.10012515644555695, "grad_norm": 0.1251622438430786, "learning_rate": 0.0001946398659966499, "loss": 0.4541, "step": 20 }, { "epoch": 0.15018773466833543, "grad_norm": 0.20880717039108276, "learning_rate": 0.0001912897822445561, "loss": 0.3029, "step": 30 }, { "epoch": 0.2002503128911139, "grad_norm": 0.16143666207790375, "learning_rate": 0.0001879396984924623, "loss": 0.2429, "step": 40 }, { "epoch": 0.2503128911138924, "grad_norm": 0.13474377989768982, "learning_rate": 0.0001845896147403685, "loss": 0.2332, "step": 50 }, { "epoch": 0.30037546933667086, "grad_norm": 0.17662972211837769, "learning_rate": 0.0001812395309882747, "loss": 0.2085, "step": 60 }, { "epoch": 0.3504380475594493, "grad_norm": 0.17377126216888428, "learning_rate": 0.0001778894472361809, "loss": 0.1961, "step": 70 }, { "epoch": 0.4005006257822278, "grad_norm": 0.1938253790140152, "learning_rate": 0.0001745393634840871, "loss": 0.2032, "step": 80 }, { "epoch": 0.45056320400500627, "grad_norm": 0.1416705697774887, "learning_rate": 0.0001711892797319933, "loss": 0.198, "step": 90 }, { "epoch": 0.5006257822277848, "grad_norm": 0.16024866700172424, "learning_rate": 0.0001678391959798995, "loss": 0.185, "step": 100 }, { "epoch": 0.5006257822277848, "eval_loss": 0.042159534990787506, "eval_runtime": 125.0369, "eval_samples_per_second": 0.344, "eval_steps_per_second": 0.344, "step": 100 }, { "epoch": 0.5506883604505632, "grad_norm": 0.1869831383228302, "learning_rate": 0.0001644891122278057, "loss": 0.1921, "step": 110 }, { "epoch": 0.6007509386733417, "grad_norm": 0.15546876192092896, "learning_rate": 0.0001611390284757119, "loss": 0.1741, "step": 120 }, { "epoch": 0.6508135168961201, "grad_norm": 0.1712736338376999, "learning_rate": 0.0001577889447236181, "loss": 0.174, "step": 130 }, { "epoch": 0.7008760951188986, "grad_norm": 0.1676545888185501, "learning_rate": 0.0001544388609715243, "loss": 0.1571, "step": 140 }, { "epoch": 0.7509386733416771, "grad_norm": 0.17751628160476685, "learning_rate": 0.00015108877721943048, "loss": 0.1778, "step": 150 }, { "epoch": 0.8010012515644556, "grad_norm": 0.1387346237897873, "learning_rate": 0.00014773869346733668, "loss": 0.1532, "step": 160 }, { "epoch": 0.851063829787234, "grad_norm": 0.17603643238544464, "learning_rate": 0.00014438860971524288, "loss": 0.1577, "step": 170 }, { "epoch": 0.9011264080100125, "grad_norm": 0.1522763967514038, "learning_rate": 0.00014103852596314908, "loss": 0.1501, "step": 180 }, { "epoch": 0.951188986232791, "grad_norm": 0.14780306816101074, "learning_rate": 0.00013768844221105528, "loss": 0.1499, "step": 190 }, { "epoch": 1.0, "grad_norm": 0.11175049841403961, "learning_rate": 0.00013433835845896147, "loss": 0.1509, "step": 200 }, { "epoch": 1.0, "eval_loss": 0.037981580942869186, "eval_runtime": 125.2439, "eval_samples_per_second": 0.343, "eval_steps_per_second": 0.343, "step": 200 }, { "epoch": 1.0500625782227784, "grad_norm": 0.15822191536426544, "learning_rate": 0.00013098827470686767, "loss": 0.1307, "step": 210 }, { "epoch": 1.100125156445557, "grad_norm": 0.19546131789684296, "learning_rate": 0.00012763819095477387, "loss": 0.1189, "step": 220 }, { "epoch": 1.1501877346683353, "grad_norm": 0.17685039341449738, "learning_rate": 0.00012428810720268007, "loss": 0.1296, "step": 230 }, { "epoch": 1.200250312891114, "grad_norm": 0.17825502157211304, "learning_rate": 0.00012093802345058627, "loss": 0.1291, "step": 240 }, { "epoch": 1.2503128911138923, "grad_norm": 0.19515497982501984, "learning_rate": 0.00011758793969849247, "loss": 0.1283, "step": 250 }, { "epoch": 1.300375469336671, "grad_norm": 0.22185975313186646, "learning_rate": 0.00011423785594639866, "loss": 0.1358, "step": 260 }, { "epoch": 1.3504380475594493, "grad_norm": 0.18015995621681213, "learning_rate": 0.00011088777219430486, "loss": 0.1284, "step": 270 }, { "epoch": 1.400500625782228, "grad_norm": 0.2559189796447754, "learning_rate": 0.00010753768844221106, "loss": 0.1188, "step": 280 }, { "epoch": 1.4505632040050063, "grad_norm": 0.17111122608184814, "learning_rate": 0.00010418760469011726, "loss": 0.1243, "step": 290 }, { "epoch": 1.5006257822277846, "grad_norm": 0.19879887998104095, "learning_rate": 0.00010083752093802346, "loss": 0.1364, "step": 300 }, { "epoch": 1.5006257822277846, "eval_loss": 0.03517143055796623, "eval_runtime": 124.8471, "eval_samples_per_second": 0.344, "eval_steps_per_second": 0.344, "step": 300 }, { "epoch": 1.5506883604505632, "grad_norm": 0.21239443123340607, "learning_rate": 9.748743718592965e-05, "loss": 0.1198, "step": 310 }, { "epoch": 1.6007509386733418, "grad_norm": 0.21676813066005707, "learning_rate": 9.413735343383585e-05, "loss": 0.1309, "step": 320 }, { "epoch": 1.65081351689612, "grad_norm": 0.2111431062221527, "learning_rate": 9.078726968174205e-05, "loss": 0.123, "step": 330 }, { "epoch": 1.7008760951188986, "grad_norm": 0.17868830263614655, "learning_rate": 8.743718592964825e-05, "loss": 0.1133, "step": 340 }, { "epoch": 1.7509386733416772, "grad_norm": 0.20656318962574005, "learning_rate": 8.408710217755445e-05, "loss": 0.119, "step": 350 }, { "epoch": 1.8010012515644556, "grad_norm": 0.25555238127708435, "learning_rate": 8.073701842546064e-05, "loss": 0.1142, "step": 360 }, { "epoch": 1.851063829787234, "grad_norm": 0.17207714915275574, "learning_rate": 7.738693467336684e-05, "loss": 0.1085, "step": 370 }, { "epoch": 1.9011264080100125, "grad_norm": 0.16670793294906616, "learning_rate": 7.403685092127304e-05, "loss": 0.11, "step": 380 }, { "epoch": 1.9511889862327911, "grad_norm": 0.20777879655361176, "learning_rate": 7.068676716917924e-05, "loss": 0.1094, "step": 390 }, { "epoch": 2.0, "grad_norm": 0.20488321781158447, "learning_rate": 6.733668341708544e-05, "loss": 0.1182, "step": 400 }, { "epoch": 2.0, "eval_loss": 0.03272353485226631, "eval_runtime": 125.3359, "eval_samples_per_second": 0.343, "eval_steps_per_second": 0.343, "step": 400 } ], "logging_steps": 10, "max_steps": 597, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.3052956270972109e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }