| { | |
| "best_global_step": 4000, | |
| "best_metric": 2.539825201034546, | |
| "best_model_checkpoint": "./qlora_gpt2/checkpoint-4000", | |
| "epoch": 2.2785531187695813, | |
| "eval_steps": 500, | |
| "global_step": 4000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05696382796923953, | |
| "grad_norm": 0.22663557529449463, | |
| "learning_rate": 3.960000000000001e-05, | |
| "loss": 3.4511, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11392765593847906, | |
| "grad_norm": 0.2373093217611313, | |
| "learning_rate": 7.960000000000001e-05, | |
| "loss": 3.068, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1708914839077186, | |
| "grad_norm": 0.25586986541748047, | |
| "learning_rate": 0.00011960000000000001, | |
| "loss": 2.9652, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.22785531187695812, | |
| "grad_norm": 0.24306726455688477, | |
| "learning_rate": 0.0001596, | |
| "loss": 2.9145, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.28481913984619767, | |
| "grad_norm": 0.2453053742647171, | |
| "learning_rate": 0.0001996, | |
| "loss": 2.8514, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.28481913984619767, | |
| "eval_loss": 2.73831844329834, | |
| "eval_runtime": 780.3351, | |
| "eval_samples_per_second": 15.997, | |
| "eval_steps_per_second": 1.001, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3417829678154372, | |
| "grad_norm": 0.2287718951702118, | |
| "learning_rate": 0.00019584731543624163, | |
| "loss": 2.8526, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.39874679578467676, | |
| "grad_norm": 0.23401789367198944, | |
| "learning_rate": 0.0001916526845637584, | |
| "loss": 2.8242, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.45571062375391624, | |
| "grad_norm": 0.2420588731765747, | |
| "learning_rate": 0.0001874580536912752, | |
| "loss": 2.8295, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5126744517231558, | |
| "grad_norm": 0.24802158772945404, | |
| "learning_rate": 0.00018326342281879197, | |
| "loss": 2.7982, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5696382796923953, | |
| "grad_norm": 0.24083346128463745, | |
| "learning_rate": 0.00017906879194630872, | |
| "loss": 2.7647, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5696382796923953, | |
| "eval_loss": 2.657050371170044, | |
| "eval_runtime": 780.7988, | |
| "eval_samples_per_second": 15.987, | |
| "eval_steps_per_second": 1.0, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6266021076616348, | |
| "grad_norm": 0.23890583217144012, | |
| "learning_rate": 0.0001748741610738255, | |
| "loss": 2.7541, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6835659356308744, | |
| "grad_norm": 0.2339860498905182, | |
| "learning_rate": 0.00017067953020134227, | |
| "loss": 2.7423, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.7405297636001139, | |
| "grad_norm": 0.224105566740036, | |
| "learning_rate": 0.00016648489932885908, | |
| "loss": 2.7567, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.7974935915693535, | |
| "grad_norm": 0.21676279604434967, | |
| "learning_rate": 0.00016229026845637586, | |
| "loss": 2.7369, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.854457419538593, | |
| "grad_norm": 0.22006016969680786, | |
| "learning_rate": 0.00015809563758389263, | |
| "loss": 2.7527, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.854457419538593, | |
| "eval_loss": 2.614635467529297, | |
| "eval_runtime": 780.923, | |
| "eval_samples_per_second": 15.985, | |
| "eval_steps_per_second": 1.0, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9114212475078325, | |
| "grad_norm": 0.2208578735589981, | |
| "learning_rate": 0.0001539010067114094, | |
| "loss": 2.7038, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.9683850754770721, | |
| "grad_norm": 0.245719775557518, | |
| "learning_rate": 0.00014970637583892616, | |
| "loss": 2.7085, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.0256337225861578, | |
| "grad_norm": 0.22791120409965515, | |
| "learning_rate": 0.00014551174496644294, | |
| "loss": 2.7286, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.0825975505553973, | |
| "grad_norm": 0.2143191248178482, | |
| "learning_rate": 0.00014131711409395975, | |
| "loss": 2.6748, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.1395613785246368, | |
| "grad_norm": 0.2522701323032379, | |
| "learning_rate": 0.00013712248322147652, | |
| "loss": 2.6871, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.1395613785246368, | |
| "eval_loss": 2.5919432640075684, | |
| "eval_runtime": 777.5861, | |
| "eval_samples_per_second": 16.054, | |
| "eval_steps_per_second": 1.004, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.1965252064938765, | |
| "grad_norm": 0.21982581913471222, | |
| "learning_rate": 0.0001329278523489933, | |
| "loss": 2.644, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.253489034463116, | |
| "grad_norm": 0.19931554794311523, | |
| "learning_rate": 0.00012873322147651008, | |
| "loss": 2.6782, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.3104528624323555, | |
| "grad_norm": 0.22992636263370514, | |
| "learning_rate": 0.00012453859060402686, | |
| "loss": 2.6368, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.367416690401595, | |
| "grad_norm": 0.257996529340744, | |
| "learning_rate": 0.00012034395973154362, | |
| "loss": 2.6744, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.4243805183708345, | |
| "grad_norm": 0.23609480261802673, | |
| "learning_rate": 0.0001161493288590604, | |
| "loss": 2.662, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.4243805183708345, | |
| "eval_loss": 2.572186231613159, | |
| "eval_runtime": 779.8917, | |
| "eval_samples_per_second": 16.006, | |
| "eval_steps_per_second": 1.001, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.481344346340074, | |
| "grad_norm": 0.22605575621128082, | |
| "learning_rate": 0.00011195469798657718, | |
| "loss": 2.6789, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.5383081743093134, | |
| "grad_norm": 0.2314230501651764, | |
| "learning_rate": 0.00010776006711409397, | |
| "loss": 2.6356, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.5952720022785531, | |
| "grad_norm": 0.23274995386600494, | |
| "learning_rate": 0.00010356543624161075, | |
| "loss": 2.661, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.6522358302477926, | |
| "grad_norm": 0.20582696795463562, | |
| "learning_rate": 9.937080536912751e-05, | |
| "loss": 2.643, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.7091996582170323, | |
| "grad_norm": 0.2208004742860794, | |
| "learning_rate": 9.51761744966443e-05, | |
| "loss": 2.6328, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.7091996582170323, | |
| "eval_loss": 2.558286190032959, | |
| "eval_runtime": 779.5251, | |
| "eval_samples_per_second": 16.014, | |
| "eval_steps_per_second": 1.002, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.7661634861862718, | |
| "grad_norm": 0.23239333927631378, | |
| "learning_rate": 9.098154362416108e-05, | |
| "loss": 2.6733, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.8231273141555113, | |
| "grad_norm": 0.2152535766363144, | |
| "learning_rate": 8.678691275167785e-05, | |
| "loss": 2.6505, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.8800911421247508, | |
| "grad_norm": 0.21094359457492828, | |
| "learning_rate": 8.259228187919464e-05, | |
| "loss": 2.6153, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.9370549700939903, | |
| "grad_norm": 0.20640310645103455, | |
| "learning_rate": 7.839765100671142e-05, | |
| "loss": 2.6411, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.9940187980632298, | |
| "grad_norm": 0.2932434678077698, | |
| "learning_rate": 7.42030201342282e-05, | |
| "loss": 2.6608, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.9940187980632298, | |
| "eval_loss": 2.5474600791931152, | |
| "eval_runtime": 779.7457, | |
| "eval_samples_per_second": 16.009, | |
| "eval_steps_per_second": 1.002, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.0506978068926234, | |
| "grad_norm": 0.22819621860980988, | |
| "learning_rate": 7.000838926174496e-05, | |
| "loss": 2.6262, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.107661634861863, | |
| "grad_norm": 0.23161746561527252, | |
| "learning_rate": 6.581375838926175e-05, | |
| "loss": 2.6318, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.1646254628311024, | |
| "grad_norm": 0.26975372433662415, | |
| "learning_rate": 6.161912751677853e-05, | |
| "loss": 2.6433, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.221589290800342, | |
| "grad_norm": 0.23308990895748138, | |
| "learning_rate": 5.74244966442953e-05, | |
| "loss": 2.6375, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.2785531187695813, | |
| "grad_norm": 0.23112072050571442, | |
| "learning_rate": 5.322986577181208e-05, | |
| "loss": 2.6248, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.2785531187695813, | |
| "eval_loss": 2.539825201034546, | |
| "eval_runtime": 779.2483, | |
| "eval_samples_per_second": 16.019, | |
| "eval_steps_per_second": 1.002, | |
| "step": 4000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 5268, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.439796133185454e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |