| { |
| "best_global_step": 7000, |
| "best_metric": 0.9318026741133476, |
| "best_model_checkpoint": "./results/checkpoint-7000", |
| "epoch": 0.2063344681729083, |
| "eval_steps": 500, |
| "global_step": 8000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0025791808521613536, |
| "grad_norm": 8.55178451538086, |
| "learning_rate": 3.96e-06, |
| "loss": 0.6948, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.005158361704322707, |
| "grad_norm": 6.15341329574585, |
| "learning_rate": 7.960000000000002e-06, |
| "loss": 0.6256, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.00773754255648406, |
| "grad_norm": 7.876453876495361, |
| "learning_rate": 1.196e-05, |
| "loss": 0.4996, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.010316723408645414, |
| "grad_norm": 25.46845817565918, |
| "learning_rate": 1.5960000000000003e-05, |
| "loss": 0.4267, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.012895904260806768, |
| "grad_norm": 12.137995719909668, |
| "learning_rate": 1.9960000000000002e-05, |
| "loss": 0.3932, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.012895904260806768, |
| "eval_accuracy": 0.8497975290810141, |
| "eval_f1": 0.8492073290894184, |
| "eval_loss": 0.36694180965423584, |
| "eval_precision": 0.8566847895515343, |
| "eval_recall": 0.8497975290810141, |
| "eval_runtime": 81.3998, |
| "eval_samples_per_second": 952.607, |
| "eval_steps_per_second": 59.546, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.01547508511296812, |
| "grad_norm": 9.444395065307617, |
| "learning_rate": 1.99829039165573e-05, |
| "loss": 0.3438, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.018054265965129476, |
| "grad_norm": 12.698025703430176, |
| "learning_rate": 1.9965635145403055e-05, |
| "loss": 0.3897, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.02063344681729083, |
| "grad_norm": 7.648699760437012, |
| "learning_rate": 1.994836637424881e-05, |
| "loss": 0.3197, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.02321262766945218, |
| "grad_norm": 11.770590782165527, |
| "learning_rate": 1.9931097603094565e-05, |
| "loss": 0.2641, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.025791808521613536, |
| "grad_norm": 1.2174805402755737, |
| "learning_rate": 1.991382883194032e-05, |
| "loss": 0.3093, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.025791808521613536, |
| "eval_accuracy": 0.8805163653245983, |
| "eval_f1": 0.8798423409034712, |
| "eval_loss": 0.29810118675231934, |
| "eval_precision": 0.8908826043373803, |
| "eval_recall": 0.8805163653245983, |
| "eval_runtime": 82.3661, |
| "eval_samples_per_second": 941.431, |
| "eval_steps_per_second": 58.847, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.02837098937377489, |
| "grad_norm": 35.91494369506836, |
| "learning_rate": 1.9896560060786075e-05, |
| "loss": 0.2701, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.03095017022593624, |
| "grad_norm": 10.724688529968262, |
| "learning_rate": 1.9879291289631834e-05, |
| "loss": 0.2723, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.0335293510780976, |
| "grad_norm": 11.826756477355957, |
| "learning_rate": 1.9862022518477585e-05, |
| "loss": 0.2669, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.03610853193025895, |
| "grad_norm": 13.958097457885742, |
| "learning_rate": 1.984475374732334e-05, |
| "loss": 0.2563, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.0386877127824203, |
| "grad_norm": 32.344303131103516, |
| "learning_rate": 1.98274849761691e-05, |
| "loss": 0.2717, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.0386877127824203, |
| "eval_accuracy": 0.9098424079853499, |
| "eval_f1": 0.9098439407901873, |
| "eval_loss": 0.2351153939962387, |
| "eval_precision": 0.9100045564108226, |
| "eval_recall": 0.9098424079853499, |
| "eval_runtime": 80.9696, |
| "eval_samples_per_second": 957.669, |
| "eval_steps_per_second": 59.862, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.04126689363458166, |
| "grad_norm": 41.87895584106445, |
| "learning_rate": 1.9810216205014854e-05, |
| "loss": 0.2775, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.04384607448674301, |
| "grad_norm": 9.029143333435059, |
| "learning_rate": 1.9792947433860606e-05, |
| "loss": 0.2747, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.04642525533890436, |
| "grad_norm": 0.5453509092330933, |
| "learning_rate": 1.9775678662706365e-05, |
| "loss": 0.2333, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.04900443619106572, |
| "grad_norm": 14.877126693725586, |
| "learning_rate": 1.975840989155212e-05, |
| "loss": 0.2335, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.05158361704322707, |
| "grad_norm": 9.152324676513672, |
| "learning_rate": 1.974114112039787e-05, |
| "loss": 0.2501, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.05158361704322707, |
| "eval_accuracy": 0.915400686079802, |
| "eval_f1": 0.9153786857019529, |
| "eval_loss": 0.24714471399784088, |
| "eval_precision": 0.9155981210913413, |
| "eval_recall": 0.915400686079802, |
| "eval_runtime": 80.7839, |
| "eval_samples_per_second": 959.869, |
| "eval_steps_per_second": 60.0, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.05416279789538842, |
| "grad_norm": 3.9494690895080566, |
| "learning_rate": 1.972387234924363e-05, |
| "loss": 0.2432, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.05674197874754978, |
| "grad_norm": 7.518190383911133, |
| "learning_rate": 1.9706603578089385e-05, |
| "loss": 0.2179, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.05932115959971113, |
| "grad_norm": 55.069461822509766, |
| "learning_rate": 1.968933480693514e-05, |
| "loss": 0.2603, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.06190034045187248, |
| "grad_norm": 21.235607147216797, |
| "learning_rate": 1.9672066035780896e-05, |
| "loss": 0.2363, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.06447952130403384, |
| "grad_norm": 30.956710815429688, |
| "learning_rate": 1.965479726462665e-05, |
| "loss": 0.264, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.06447952130403384, |
| "eval_accuracy": 0.9174769802171726, |
| "eval_f1": 0.9174732815674826, |
| "eval_loss": 0.22409705817699432, |
| "eval_precision": 0.9178450257324412, |
| "eval_recall": 0.9174769802171726, |
| "eval_runtime": 82.4939, |
| "eval_samples_per_second": 939.972, |
| "eval_steps_per_second": 58.756, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.0670587021561952, |
| "grad_norm": 3.0290513038635254, |
| "learning_rate": 1.9637528493472406e-05, |
| "loss": 0.2388, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.06963788300835655, |
| "grad_norm": 4.662931442260742, |
| "learning_rate": 1.962025972231816e-05, |
| "loss": 0.2132, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.0722170638605179, |
| "grad_norm": 31.94866180419922, |
| "learning_rate": 1.9602990951163916e-05, |
| "loss": 0.2576, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.07479624471267925, |
| "grad_norm": 3.5957560539245605, |
| "learning_rate": 1.958572218000967e-05, |
| "loss": 0.2404, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.0773754255648406, |
| "grad_norm": 6.48631477355957, |
| "learning_rate": 1.9568453408855427e-05, |
| "loss": 0.2514, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.0773754255648406, |
| "eval_accuracy": 0.9169998194526837, |
| "eval_f1": 0.9170017211060641, |
| "eval_loss": 0.21086551249027252, |
| "eval_precision": 0.9171348873594787, |
| "eval_recall": 0.9169998194526837, |
| "eval_runtime": 80.1992, |
| "eval_samples_per_second": 966.867, |
| "eval_steps_per_second": 60.437, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.07995460641700196, |
| "grad_norm": 5.789911270141602, |
| "learning_rate": 1.9551184637701185e-05, |
| "loss": 0.2138, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.08253378726916331, |
| "grad_norm": 8.613458633422852, |
| "learning_rate": 1.9533915866546937e-05, |
| "loss": 0.2323, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.08511296812132467, |
| "grad_norm": 5.5360822677612305, |
| "learning_rate": 1.9516647095392692e-05, |
| "loss": 0.2474, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.08769214897348603, |
| "grad_norm": 15.3199462890625, |
| "learning_rate": 1.949937832423845e-05, |
| "loss": 0.1964, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.09027132982564738, |
| "grad_norm": 0.23756052553653717, |
| "learning_rate": 1.9482109553084202e-05, |
| "loss": 0.2048, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.09027132982564738, |
| "eval_accuracy": 0.9212555776224498, |
| "eval_f1": 0.9212388888583593, |
| "eval_loss": 0.217011496424675, |
| "eval_precision": 0.9220491698721403, |
| "eval_recall": 0.9212555776224498, |
| "eval_runtime": 82.2451, |
| "eval_samples_per_second": 942.816, |
| "eval_steps_per_second": 58.934, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.09285051067780872, |
| "grad_norm": 3.4589858055114746, |
| "learning_rate": 1.9464840781929957e-05, |
| "loss": 0.2335, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.09542969152997008, |
| "grad_norm": 6.5932111740112305, |
| "learning_rate": 1.9447572010775716e-05, |
| "loss": 0.2239, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.09800887238213143, |
| "grad_norm": 2.6831107139587402, |
| "learning_rate": 1.943030323962147e-05, |
| "loss": 0.2049, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.10058805323429279, |
| "grad_norm": 26.725677490234375, |
| "learning_rate": 1.9413034468467223e-05, |
| "loss": 0.2404, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.10316723408645415, |
| "grad_norm": 26.966718673706055, |
| "learning_rate": 1.939576569731298e-05, |
| "loss": 0.241, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.10316723408645415, |
| "eval_accuracy": 0.9104227386448634, |
| "eval_f1": 0.9102303033501772, |
| "eval_loss": 0.22579680383205414, |
| "eval_precision": 0.9150459252068764, |
| "eval_recall": 0.9104227386448634, |
| "eval_runtime": 81.0753, |
| "eval_samples_per_second": 956.419, |
| "eval_steps_per_second": 59.784, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.1057464149386155, |
| "grad_norm": 17.290939331054688, |
| "learning_rate": 1.9378496926158737e-05, |
| "loss": 0.2298, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.10832559579077684, |
| "grad_norm": 8.844673156738281, |
| "learning_rate": 1.9361228155004492e-05, |
| "loss": 0.224, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.1109047766429382, |
| "grad_norm": 2.0837674140930176, |
| "learning_rate": 1.9343959383850247e-05, |
| "loss": 0.2367, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.11348395749509955, |
| "grad_norm": 7.639578819274902, |
| "learning_rate": 1.9326690612696002e-05, |
| "loss": 0.2194, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.11606313834726091, |
| "grad_norm": 9.39929485321045, |
| "learning_rate": 1.9309421841541757e-05, |
| "loss": 0.2297, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.11606313834726091, |
| "eval_accuracy": 0.9185473678780531, |
| "eval_f1": 0.918488362019573, |
| "eval_loss": 0.24766957759857178, |
| "eval_precision": 0.9204437423697496, |
| "eval_recall": 0.9185473678780531, |
| "eval_runtime": 80.6546, |
| "eval_samples_per_second": 961.408, |
| "eval_steps_per_second": 60.096, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.11864231919942227, |
| "grad_norm": 63.414676666259766, |
| "learning_rate": 1.9292153070387512e-05, |
| "loss": 0.2067, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.12122150005158362, |
| "grad_norm": 27.895687103271484, |
| "learning_rate": 1.9274884299233268e-05, |
| "loss": 0.1975, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.12380068090374496, |
| "grad_norm": 5.536584377288818, |
| "learning_rate": 1.9257615528079023e-05, |
| "loss": 0.2141, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.12637986175590632, |
| "grad_norm": 18.6892147064209, |
| "learning_rate": 1.9240346756924778e-05, |
| "loss": 0.2549, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.12895904260806768, |
| "grad_norm": 3.705787420272827, |
| "learning_rate": 1.9223077985770536e-05, |
| "loss": 0.2187, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.12895904260806768, |
| "eval_accuracy": 0.9244409481313353, |
| "eval_f1": 0.9243951823390982, |
| "eval_loss": 0.22254011034965515, |
| "eval_precision": 0.9250643288957023, |
| "eval_recall": 0.9244409481313353, |
| "eval_runtime": 84.6134, |
| "eval_samples_per_second": 916.427, |
| "eval_steps_per_second": 57.284, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.13153822346022903, |
| "grad_norm": 10.738173484802246, |
| "learning_rate": 1.9205809214616288e-05, |
| "loss": 0.2143, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.1341174043123904, |
| "grad_norm": 7.39179801940918, |
| "learning_rate": 1.9188540443462043e-05, |
| "loss": 0.205, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.13669658516455174, |
| "grad_norm": 14.402508735656738, |
| "learning_rate": 1.9171271672307802e-05, |
| "loss": 0.2027, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.1392757660167131, |
| "grad_norm": 8.077102661132812, |
| "learning_rate": 1.9154002901153554e-05, |
| "loss": 0.1909, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.14185494686887445, |
| "grad_norm": 2.036444664001465, |
| "learning_rate": 1.913673412999931e-05, |
| "loss": 0.2285, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.14185494686887445, |
| "eval_accuracy": 0.9220164555982564, |
| "eval_f1": 0.9220066809051832, |
| "eval_loss": 0.2105225920677185, |
| "eval_precision": 0.9220853946451525, |
| "eval_recall": 0.9220164555982564, |
| "eval_runtime": 85.408, |
| "eval_samples_per_second": 907.901, |
| "eval_steps_per_second": 56.751, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.1444341277210358, |
| "grad_norm": 3.172539472579956, |
| "learning_rate": 1.9119465358845067e-05, |
| "loss": 0.1839, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.14701330857319717, |
| "grad_norm": 4.246190547943115, |
| "learning_rate": 1.9102196587690823e-05, |
| "loss": 0.2248, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.1495924894253585, |
| "grad_norm": 9.676543235778809, |
| "learning_rate": 1.9084927816536574e-05, |
| "loss": 0.2003, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.15217167027751985, |
| "grad_norm": 14.093317031860352, |
| "learning_rate": 1.9067659045382333e-05, |
| "loss": 0.2283, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.1547508511296812, |
| "grad_norm": 6.108678817749023, |
| "learning_rate": 1.9050390274228088e-05, |
| "loss": 0.2001, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.1547508511296812, |
| "eval_accuracy": 0.9288514611436383, |
| "eval_f1": 0.9288285810713337, |
| "eval_loss": 0.19547687470912933, |
| "eval_precision": 0.9291294558172835, |
| "eval_recall": 0.9288514611436383, |
| "eval_runtime": 85.1832, |
| "eval_samples_per_second": 910.297, |
| "eval_steps_per_second": 56.901, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.15733003198184256, |
| "grad_norm": 25.980791091918945, |
| "learning_rate": 1.9033121503073843e-05, |
| "loss": 0.2045, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.15990921283400392, |
| "grad_norm": 3.697967767715454, |
| "learning_rate": 1.90158527319196e-05, |
| "loss": 0.1921, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.16248839368616527, |
| "grad_norm": 6.734536647796631, |
| "learning_rate": 1.8998583960765353e-05, |
| "loss": 0.2398, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.16506757453832663, |
| "grad_norm": 4.171863555908203, |
| "learning_rate": 1.898131518961111e-05, |
| "loss": 0.1851, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.16764675539048798, |
| "grad_norm": 8.22290325164795, |
| "learning_rate": 1.8964046418456864e-05, |
| "loss": 0.2048, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.16764675539048798, |
| "eval_accuracy": 0.9237316551030409, |
| "eval_f1": 0.9237108101584136, |
| "eval_loss": 0.21942158043384552, |
| "eval_precision": 0.9246712025171661, |
| "eval_recall": 0.9237316551030409, |
| "eval_runtime": 87.4195, |
| "eval_samples_per_second": 887.01, |
| "eval_steps_per_second": 55.445, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.17022593624264934, |
| "grad_norm": 1.4064490795135498, |
| "learning_rate": 1.894677764730262e-05, |
| "loss": 0.2192, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.1728051170948107, |
| "grad_norm": 0.8423879146575928, |
| "learning_rate": 1.8929508876148374e-05, |
| "loss": 0.2296, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.17538429794697205, |
| "grad_norm": 2.1388964653015137, |
| "learning_rate": 1.891224010499413e-05, |
| "loss": 0.2397, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.1779634787991334, |
| "grad_norm": 6.909358501434326, |
| "learning_rate": 1.8894971333839888e-05, |
| "loss": 0.1902, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.18054265965129476, |
| "grad_norm": 1.044767141342163, |
| "learning_rate": 1.887770256268564e-05, |
| "loss": 0.1931, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.18054265965129476, |
| "eval_accuracy": 0.9318046993887183, |
| "eval_f1": 0.9318026741133476, |
| "eval_loss": 0.18849702179431915, |
| "eval_precision": 0.9318098201446341, |
| "eval_recall": 0.9318046993887183, |
| "eval_runtime": 85.7797, |
| "eval_samples_per_second": 903.967, |
| "eval_steps_per_second": 56.505, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.1831218405034561, |
| "grad_norm": 9.8370943069458, |
| "learning_rate": 1.8860433791531395e-05, |
| "loss": 0.2179, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.18570102135561745, |
| "grad_norm": 3.937364101409912, |
| "learning_rate": 1.8843165020377153e-05, |
| "loss": 0.2017, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.1882802022077788, |
| "grad_norm": 1.7348313331604004, |
| "learning_rate": 1.8825896249222905e-05, |
| "loss": 0.2098, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.19085938305994016, |
| "grad_norm": 13.738706588745117, |
| "learning_rate": 1.880862747806866e-05, |
| "loss": 0.2176, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.1934385639121015, |
| "grad_norm": 7.888184547424316, |
| "learning_rate": 1.879135870691442e-05, |
| "loss": 0.2052, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.1934385639121015, |
| "eval_accuracy": 0.9288256686698821, |
| "eval_f1": 0.9287744301894623, |
| "eval_loss": 0.19062775373458862, |
| "eval_precision": 0.9296090951021123, |
| "eval_recall": 0.9288256686698821, |
| "eval_runtime": 84.5191, |
| "eval_samples_per_second": 917.45, |
| "eval_steps_per_second": 57.348, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.19601774476426287, |
| "grad_norm": 8.46711254119873, |
| "learning_rate": 1.8774089935760174e-05, |
| "loss": 0.2069, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.19859692561642422, |
| "grad_norm": 0.5938565731048584, |
| "learning_rate": 1.8756821164605926e-05, |
| "loss": 0.1842, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.20117610646858558, |
| "grad_norm": 14.665600776672363, |
| "learning_rate": 1.8739552393451684e-05, |
| "loss": 0.2097, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.20375528732074694, |
| "grad_norm": 11.763516426086426, |
| "learning_rate": 1.872228362229744e-05, |
| "loss": 0.221, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.2063344681729083, |
| "grad_norm": 3.6280648708343506, |
| "learning_rate": 1.8705014851143195e-05, |
| "loss": 0.1904, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.2063344681729083, |
| "eval_accuracy": 0.929612339119445, |
| "eval_f1": 0.9296143948889304, |
| "eval_loss": 0.18757739663124084, |
| "eval_precision": 0.9297072870129078, |
| "eval_recall": 0.929612339119445, |
| "eval_runtime": 85.1458, |
| "eval_samples_per_second": 910.696, |
| "eval_steps_per_second": 56.926, |
| "step": 8000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 116316, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 2 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.894541453336672e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|