PEFT
Safetensors
crhysc's picture
Upload folder using huggingface_hub
184c355 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0035124532433173,
"eval_steps": 500,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012544475868990056,
"grad_norm": 0.13098102807998657,
"learning_rate": 4.6226415094339625e-05,
"loss": 0.9457,
"step": 50
},
{
"epoch": 0.025088951737980113,
"grad_norm": 0.09060654789209366,
"learning_rate": 4.980675516719879e-05,
"loss": 0.3123,
"step": 100
},
{
"epoch": 0.037633427606970166,
"grad_norm": 0.1386842280626297,
"learning_rate": 4.959670643589313e-05,
"loss": 0.2832,
"step": 150
},
{
"epoch": 0.050177903475960225,
"grad_norm": 0.18612022697925568,
"learning_rate": 4.9386657704587466e-05,
"loss": 0.2708,
"step": 200
},
{
"epoch": 0.06272237934495027,
"grad_norm": 0.17801949381828308,
"learning_rate": 4.91766089732818e-05,
"loss": 0.2662,
"step": 250
},
{
"epoch": 0.07526685521394033,
"grad_norm": 0.10166127979755402,
"learning_rate": 4.896656024197614e-05,
"loss": 0.2599,
"step": 300
},
{
"epoch": 0.08781133108293039,
"grad_norm": 0.21376383304595947,
"learning_rate": 4.8756511510670475e-05,
"loss": 0.2549,
"step": 350
},
{
"epoch": 0.10035580695192045,
"grad_norm": 0.18035802245140076,
"learning_rate": 4.8546462779364816e-05,
"loss": 0.2515,
"step": 400
},
{
"epoch": 0.1129002828209105,
"grad_norm": 0.17301003634929657,
"learning_rate": 4.833641404805915e-05,
"loss": 0.2492,
"step": 450
},
{
"epoch": 0.12544475868990054,
"grad_norm": 0.15648286044597626,
"learning_rate": 4.812636531675349e-05,
"loss": 0.2476,
"step": 500
},
{
"epoch": 0.12544475868990054,
"eval_loss": 0.40483859181404114,
"eval_runtime": 304.1505,
"eval_samples_per_second": 65.034,
"eval_steps_per_second": 1.019,
"step": 500
},
{
"epoch": 0.13798923455889062,
"grad_norm": 0.16616199910640717,
"learning_rate": 4.7916316585447826e-05,
"loss": 0.245,
"step": 550
},
{
"epoch": 0.15053371042788066,
"grad_norm": 0.6291008591651917,
"learning_rate": 4.770626785414216e-05,
"loss": 0.2434,
"step": 600
},
{
"epoch": 0.16307818629687074,
"grad_norm": 0.27643585205078125,
"learning_rate": 4.74962191228365e-05,
"loss": 0.2414,
"step": 650
},
{
"epoch": 0.17562266216586078,
"grad_norm": 0.27261775732040405,
"learning_rate": 4.7286170391530835e-05,
"loss": 0.2407,
"step": 700
},
{
"epoch": 0.18816713803485083,
"grad_norm": 0.1961769014596939,
"learning_rate": 4.7076121660225176e-05,
"loss": 0.2389,
"step": 750
},
{
"epoch": 0.2007116139038409,
"grad_norm": 0.13879896700382233,
"learning_rate": 4.686607292891951e-05,
"loss": 0.237,
"step": 800
},
{
"epoch": 0.21325608977283095,
"grad_norm": 0.20926761627197266,
"learning_rate": 4.665602419761385e-05,
"loss": 0.2372,
"step": 850
},
{
"epoch": 0.225800565641821,
"grad_norm": 0.22533361613750458,
"learning_rate": 4.6445975466308186e-05,
"loss": 0.2351,
"step": 900
},
{
"epoch": 0.23834504151081107,
"grad_norm": 0.1977718323469162,
"learning_rate": 4.623592673500252e-05,
"loss": 0.2348,
"step": 950
},
{
"epoch": 0.2508895173798011,
"grad_norm": 0.26971426606178284,
"learning_rate": 4.602587800369686e-05,
"loss": 0.2334,
"step": 1000
},
{
"epoch": 0.2508895173798011,
"eval_loss": 0.4072725474834442,
"eval_runtime": 304.8492,
"eval_samples_per_second": 64.885,
"eval_steps_per_second": 1.017,
"step": 1000
},
{
"epoch": 0.2634339932487912,
"grad_norm": 0.2769719958305359,
"learning_rate": 4.5815829272391195e-05,
"loss": 0.2334,
"step": 1050
},
{
"epoch": 0.27597846911778123,
"grad_norm": 0.28794065117836,
"learning_rate": 4.5605780541085536e-05,
"loss": 0.2318,
"step": 1100
},
{
"epoch": 0.2885229449867713,
"grad_norm": 0.37538444995880127,
"learning_rate": 4.539573180977987e-05,
"loss": 0.231,
"step": 1150
},
{
"epoch": 0.3010674208557613,
"grad_norm": 0.17134840786457062,
"learning_rate": 4.5185683078474204e-05,
"loss": 0.231,
"step": 1200
},
{
"epoch": 0.31361189672475137,
"grad_norm": 0.28663370013237,
"learning_rate": 4.4975634347168545e-05,
"loss": 0.2294,
"step": 1250
},
{
"epoch": 0.3261563725937415,
"grad_norm": 0.7121312022209167,
"learning_rate": 4.476558561586288e-05,
"loss": 0.2297,
"step": 1300
},
{
"epoch": 0.3387008484627315,
"grad_norm": 0.2550923526287079,
"learning_rate": 4.455553688455722e-05,
"loss": 0.2283,
"step": 1350
},
{
"epoch": 0.35124532433172156,
"grad_norm": 1.0167971849441528,
"learning_rate": 4.4345488153251555e-05,
"loss": 0.2263,
"step": 1400
},
{
"epoch": 0.3637898002007116,
"grad_norm": 0.6048764586448669,
"learning_rate": 4.4135439421945896e-05,
"loss": 0.2277,
"step": 1450
},
{
"epoch": 0.37633427606970166,
"grad_norm": 0.31545552611351013,
"learning_rate": 4.392539069064023e-05,
"loss": 0.2269,
"step": 1500
},
{
"epoch": 0.37633427606970166,
"eval_loss": 0.4088518023490906,
"eval_runtime": 305.124,
"eval_samples_per_second": 64.826,
"eval_steps_per_second": 1.016,
"step": 1500
},
{
"epoch": 0.3888787519386917,
"grad_norm": 0.37552791833877563,
"learning_rate": 4.3715341959334564e-05,
"loss": 0.2246,
"step": 1550
},
{
"epoch": 0.4014232278076818,
"grad_norm": 0.2993505299091339,
"learning_rate": 4.3505293228028905e-05,
"loss": 0.2261,
"step": 1600
},
{
"epoch": 0.41396770367667185,
"grad_norm": 0.15790335834026337,
"learning_rate": 4.329524449672324e-05,
"loss": 0.2251,
"step": 1650
},
{
"epoch": 0.4265121795456619,
"grad_norm": 0.47013625502586365,
"learning_rate": 4.308519576541758e-05,
"loss": 0.2243,
"step": 1700
},
{
"epoch": 0.43905665541465194,
"grad_norm": 0.2053990662097931,
"learning_rate": 4.2875147034111915e-05,
"loss": 0.2237,
"step": 1750
},
{
"epoch": 0.451601131283642,
"grad_norm": 0.17550259828567505,
"learning_rate": 4.2665098302806256e-05,
"loss": 0.2228,
"step": 1800
},
{
"epoch": 0.46414560715263203,
"grad_norm": 0.5729805827140808,
"learning_rate": 4.245504957150059e-05,
"loss": 0.2228,
"step": 1850
},
{
"epoch": 0.47669008302162214,
"grad_norm": 0.3008301854133606,
"learning_rate": 4.2245000840194924e-05,
"loss": 0.2217,
"step": 1900
},
{
"epoch": 0.4892345588906122,
"grad_norm": 0.2061658799648285,
"learning_rate": 4.2034952108889265e-05,
"loss": 0.223,
"step": 1950
},
{
"epoch": 0.5017790347596022,
"grad_norm": 0.2295321226119995,
"learning_rate": 4.18249033775836e-05,
"loss": 0.2219,
"step": 2000
},
{
"epoch": 0.5017790347596022,
"eval_loss": 0.4091717004776001,
"eval_runtime": 304.785,
"eval_samples_per_second": 64.898,
"eval_steps_per_second": 1.017,
"step": 2000
},
{
"epoch": 0.5143235106285923,
"grad_norm": 0.22435450553894043,
"learning_rate": 4.161485464627794e-05,
"loss": 0.2215,
"step": 2050
},
{
"epoch": 0.5268679864975824,
"grad_norm": 0.185350701212883,
"learning_rate": 4.1404805914972275e-05,
"loss": 0.2207,
"step": 2100
},
{
"epoch": 0.5394124623665724,
"grad_norm": 0.46742141246795654,
"learning_rate": 4.119475718366661e-05,
"loss": 0.2197,
"step": 2150
},
{
"epoch": 0.5519569382355625,
"grad_norm": 0.20891498029232025,
"learning_rate": 4.098470845236095e-05,
"loss": 0.2194,
"step": 2200
},
{
"epoch": 0.5645014141045525,
"grad_norm": 0.4283987581729889,
"learning_rate": 4.0774659721055284e-05,
"loss": 0.2192,
"step": 2250
},
{
"epoch": 0.5770458899735426,
"grad_norm": 0.32103636860847473,
"learning_rate": 4.0564610989749625e-05,
"loss": 0.2185,
"step": 2300
},
{
"epoch": 0.5895903658425327,
"grad_norm": 0.20490871369838715,
"learning_rate": 4.035456225844396e-05,
"loss": 0.2183,
"step": 2350
},
{
"epoch": 0.6021348417115227,
"grad_norm": 0.3914024233818054,
"learning_rate": 4.01445135271383e-05,
"loss": 0.2184,
"step": 2400
},
{
"epoch": 0.6146793175805128,
"grad_norm": 0.18293343484401703,
"learning_rate": 3.9934464795832635e-05,
"loss": 0.2186,
"step": 2450
},
{
"epoch": 0.6272237934495027,
"grad_norm": 0.20402023196220398,
"learning_rate": 3.972441606452697e-05,
"loss": 0.2179,
"step": 2500
},
{
"epoch": 0.6272237934495027,
"eval_loss": 0.40875929594039917,
"eval_runtime": 304.5578,
"eval_samples_per_second": 64.947,
"eval_steps_per_second": 1.018,
"step": 2500
},
{
"epoch": 0.6397682693184928,
"grad_norm": 0.48965466022491455,
"learning_rate": 3.951436733322131e-05,
"loss": 0.2168,
"step": 2550
},
{
"epoch": 0.652312745187483,
"grad_norm": 0.5581162571907043,
"learning_rate": 3.9304318601915644e-05,
"loss": 0.2175,
"step": 2600
},
{
"epoch": 0.6648572210564729,
"grad_norm": 0.23750029504299164,
"learning_rate": 3.9094269870609985e-05,
"loss": 0.217,
"step": 2650
},
{
"epoch": 0.677401696925463,
"grad_norm": 0.5061260461807251,
"learning_rate": 3.888422113930432e-05,
"loss": 0.2151,
"step": 2700
},
{
"epoch": 0.689946172794453,
"grad_norm": 0.1854904741048813,
"learning_rate": 3.867417240799866e-05,
"loss": 0.216,
"step": 2750
},
{
"epoch": 0.7024906486634431,
"grad_norm": 0.22555580735206604,
"learning_rate": 3.8464123676692995e-05,
"loss": 0.2157,
"step": 2800
},
{
"epoch": 0.7150351245324331,
"grad_norm": 0.4870660901069641,
"learning_rate": 3.825407494538733e-05,
"loss": 0.2151,
"step": 2850
},
{
"epoch": 0.7275796004014232,
"grad_norm": 0.37115806341171265,
"learning_rate": 3.804402621408167e-05,
"loss": 0.2146,
"step": 2900
},
{
"epoch": 0.7401240762704133,
"grad_norm": 0.34767332673072815,
"learning_rate": 3.7833977482776004e-05,
"loss": 0.2139,
"step": 2950
},
{
"epoch": 0.7526685521394033,
"grad_norm": 0.2617909610271454,
"learning_rate": 3.7623928751470345e-05,
"loss": 0.2149,
"step": 3000
},
{
"epoch": 0.7526685521394033,
"eval_loss": 0.40570953488349915,
"eval_runtime": 304.7965,
"eval_samples_per_second": 64.896,
"eval_steps_per_second": 1.017,
"step": 3000
},
{
"epoch": 0.7652130280083934,
"grad_norm": 0.6052380204200745,
"learning_rate": 3.741388002016468e-05,
"loss": 0.2141,
"step": 3050
},
{
"epoch": 0.7777575038773834,
"grad_norm": 0.3745960295200348,
"learning_rate": 3.7203831288859014e-05,
"loss": 0.213,
"step": 3100
},
{
"epoch": 0.7903019797463735,
"grad_norm": 0.24974456429481506,
"learning_rate": 3.6993782557553355e-05,
"loss": 0.2142,
"step": 3150
},
{
"epoch": 0.8028464556153636,
"grad_norm": 0.4550504684448242,
"learning_rate": 3.678373382624769e-05,
"loss": 0.2133,
"step": 3200
},
{
"epoch": 0.8153909314843536,
"grad_norm": 0.8576037287712097,
"learning_rate": 3.657368509494203e-05,
"loss": 0.2121,
"step": 3250
},
{
"epoch": 0.8279354073533437,
"grad_norm": 0.4864007532596588,
"learning_rate": 3.6363636363636364e-05,
"loss": 0.2131,
"step": 3300
},
{
"epoch": 0.8404798832223337,
"grad_norm": 0.6007568836212158,
"learning_rate": 3.6153587632330705e-05,
"loss": 0.2134,
"step": 3350
},
{
"epoch": 0.8530243590913238,
"grad_norm": 0.2667822241783142,
"learning_rate": 3.594353890102504e-05,
"loss": 0.2123,
"step": 3400
},
{
"epoch": 0.8655688349603139,
"grad_norm": 0.16192808747291565,
"learning_rate": 3.5733490169719374e-05,
"loss": 0.2102,
"step": 3450
},
{
"epoch": 0.8781133108293039,
"grad_norm": 0.4632836580276489,
"learning_rate": 3.5523441438413715e-05,
"loss": 0.2127,
"step": 3500
},
{
"epoch": 0.8781133108293039,
"eval_loss": 0.40804293751716614,
"eval_runtime": 305.2396,
"eval_samples_per_second": 64.802,
"eval_steps_per_second": 1.016,
"step": 3500
},
{
"epoch": 0.890657786698294,
"grad_norm": 0.1812131106853485,
"learning_rate": 3.531339270710805e-05,
"loss": 0.2124,
"step": 3550
},
{
"epoch": 0.903202262567284,
"grad_norm": 0.29924267530441284,
"learning_rate": 3.510334397580239e-05,
"loss": 0.2116,
"step": 3600
},
{
"epoch": 0.9157467384362741,
"grad_norm": 0.30432143807411194,
"learning_rate": 3.4893295244496724e-05,
"loss": 0.2096,
"step": 3650
},
{
"epoch": 0.9282912143052641,
"grad_norm": 0.17945361137390137,
"learning_rate": 3.4683246513191065e-05,
"loss": 0.211,
"step": 3700
},
{
"epoch": 0.9408356901742542,
"grad_norm": 0.2670902907848358,
"learning_rate": 3.44731977818854e-05,
"loss": 0.2118,
"step": 3750
},
{
"epoch": 0.9533801660432443,
"grad_norm": 0.350669801235199,
"learning_rate": 3.4263149050579734e-05,
"loss": 0.2094,
"step": 3800
},
{
"epoch": 0.9659246419122343,
"grad_norm": 0.3146061599254608,
"learning_rate": 3.4053100319274075e-05,
"loss": 0.2092,
"step": 3850
},
{
"epoch": 0.9784691177812244,
"grad_norm": 0.16551902890205383,
"learning_rate": 3.384305158796841e-05,
"loss": 0.2103,
"step": 3900
},
{
"epoch": 0.9910135936502144,
"grad_norm": 0.34425023198127747,
"learning_rate": 3.363300285666275e-05,
"loss": 0.2095,
"step": 3950
},
{
"epoch": 1.0035124532433173,
"grad_norm": 0.44446665048599243,
"learning_rate": 3.3422954125357084e-05,
"loss": 0.2098,
"step": 4000
},
{
"epoch": 1.0035124532433173,
"eval_loss": 0.40903258323669434,
"eval_runtime": 305.036,
"eval_samples_per_second": 64.845,
"eval_steps_per_second": 1.016,
"step": 4000
}
],
"logging_steps": 50,
"max_steps": 11955,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.237451667319271e+20,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}