Instructions to use bimabk/test_env_baru with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use bimabk/test_env_baru with PEFT:
Base model is not found.
- Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.47357293868921774, | |
| "eval_steps": 500, | |
| "global_step": 448, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005285412262156448, | |
| "grad_norm": 8.903882026672363, | |
| "learning_rate": 9.166551619047618e-06, | |
| "loss": 1.2181, | |
| "num_tokens": 152231.0, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.010570824524312896, | |
| "grad_norm": 1.0083556175231934, | |
| "learning_rate": 2.062474114285714e-05, | |
| "loss": 0.6681, | |
| "num_tokens": 317223.0, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.015856236786469344, | |
| "grad_norm": 0.746456503868103, | |
| "learning_rate": 3.2082930666666666e-05, | |
| "loss": 0.4249, | |
| "num_tokens": 473767.0, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.021141649048625793, | |
| "grad_norm": 0.6790260076522827, | |
| "learning_rate": 4.3541120190476185e-05, | |
| "loss": 0.3982, | |
| "num_tokens": 659649.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.026427061310782242, | |
| "grad_norm": 0.7228266596794128, | |
| "learning_rate": 5.499930971428571e-05, | |
| "loss": 0.3887, | |
| "num_tokens": 832364.0, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03171247357293869, | |
| "grad_norm": 0.6723021268844604, | |
| "learning_rate": 6.645749923809523e-05, | |
| "loss": 0.3467, | |
| "num_tokens": 991414.0, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03699788583509514, | |
| "grad_norm": 0.5942872166633606, | |
| "learning_rate": 7.791568876190476e-05, | |
| "loss": 0.3578, | |
| "num_tokens": 1158859.0, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.042283298097251586, | |
| "grad_norm": 0.6299146413803101, | |
| "learning_rate": 8.020446518214365e-05, | |
| "loss": 0.3567, | |
| "num_tokens": 1312426.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04756871035940803, | |
| "grad_norm": 0.43629753589630127, | |
| "learning_rate": 8.0192841334398e-05, | |
| "loss": 0.3251, | |
| "num_tokens": 1488799.0, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.052854122621564484, | |
| "grad_norm": 0.7118616700172424, | |
| "learning_rate": 8.017227973373715e-05, | |
| "loss": 0.3148, | |
| "num_tokens": 1659228.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05813953488372093, | |
| "grad_norm": 0.4712906777858734, | |
| "learning_rate": 8.014278649308742e-05, | |
| "loss": 0.3528, | |
| "num_tokens": 1824722.0, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.06342494714587738, | |
| "grad_norm": 0.5827367305755615, | |
| "learning_rate": 8.010437038073538e-05, | |
| "loss": 0.3307, | |
| "num_tokens": 1999570.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06871035940803383, | |
| "grad_norm": 0.46786293387413025, | |
| "learning_rate": 8.005704281772099e-05, | |
| "loss": 0.3722, | |
| "num_tokens": 2149278.0, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.07399577167019028, | |
| "grad_norm": 0.46910208463668823, | |
| "learning_rate": 8.000081787444232e-05, | |
| "loss": 0.3281, | |
| "num_tokens": 2321362.0, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07928118393234672, | |
| "grad_norm": 0.46508923172950745, | |
| "learning_rate": 7.993571226647224e-05, | |
| "loss": 0.31, | |
| "num_tokens": 2513422.0, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.08456659619450317, | |
| "grad_norm": 0.3090206980705261, | |
| "learning_rate": 7.98617453495891e-05, | |
| "loss": 0.3267, | |
| "num_tokens": 2682030.0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08985200845665962, | |
| "grad_norm": 0.4037857949733734, | |
| "learning_rate": 7.977893911402208e-05, | |
| "loss": 0.3497, | |
| "num_tokens": 2838889.0, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.09513742071881606, | |
| "grad_norm": 0.3317049443721771, | |
| "learning_rate": 7.968731817791378e-05, | |
| "loss": 0.4027, | |
| "num_tokens": 2984527.0, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.10042283298097252, | |
| "grad_norm": 0.4948605000972748, | |
| "learning_rate": 7.958690978000108e-05, | |
| "loss": 0.3301, | |
| "num_tokens": 3129850.0, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.10570824524312897, | |
| "grad_norm": 0.5033330917358398, | |
| "learning_rate": 7.947774377151723e-05, | |
| "loss": 0.3574, | |
| "num_tokens": 3289081.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1109936575052854, | |
| "grad_norm": 0.6163113117218018, | |
| "learning_rate": 7.935985260731712e-05, | |
| "loss": 0.3465, | |
| "num_tokens": 3456305.0, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.11627906976744186, | |
| "grad_norm": 0.3303355574607849, | |
| "learning_rate": 7.923327133622843e-05, | |
| "loss": 0.3013, | |
| "num_tokens": 3632845.0, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12156448202959831, | |
| "grad_norm": 0.3407858908176422, | |
| "learning_rate": 7.909803759063184e-05, | |
| "loss": 0.3059, | |
| "num_tokens": 3829460.0, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.12684989429175475, | |
| "grad_norm": 0.39908474683761597, | |
| "learning_rate": 7.895419157527279e-05, | |
| "loss": 0.3627, | |
| "num_tokens": 3992992.0, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1321353065539112, | |
| "grad_norm": 0.29309067130088806, | |
| "learning_rate": 7.880177605530884e-05, | |
| "loss": 0.2892, | |
| "num_tokens": 4164780.0, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.13742071881606766, | |
| "grad_norm": 0.37892845273017883, | |
| "learning_rate": 7.864083634359562e-05, | |
| "loss": 0.3028, | |
| "num_tokens": 4322872.0, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1427061310782241, | |
| "grad_norm": 0.34533941745758057, | |
| "learning_rate": 7.847142028721538e-05, | |
| "loss": 0.3528, | |
| "num_tokens": 4484774.0, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.14799154334038056, | |
| "grad_norm": 0.47042974829673767, | |
| "learning_rate": 7.829357825325212e-05, | |
| "loss": 0.3279, | |
| "num_tokens": 4661609.0, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.15327695560253699, | |
| "grad_norm": 0.3045842945575714, | |
| "learning_rate": 7.810736311381762e-05, | |
| "loss": 0.3445, | |
| "num_tokens": 4827667.0, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.15856236786469344, | |
| "grad_norm": 0.4161529541015625, | |
| "learning_rate": 7.791283023033264e-05, | |
| "loss": 0.3148, | |
| "num_tokens": 5000163.0, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1638477801268499, | |
| "grad_norm": 0.4866187274456024, | |
| "learning_rate": 7.771003743706797e-05, | |
| "loss": 0.3225, | |
| "num_tokens": 5168441.0, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.16913319238900634, | |
| "grad_norm": 0.35074010491371155, | |
| "learning_rate": 7.749904502395058e-05, | |
| "loss": 0.3089, | |
| "num_tokens": 5338454.0, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1744186046511628, | |
| "grad_norm": 0.34164947271347046, | |
| "learning_rate": 7.727991571863935e-05, | |
| "loss": 0.3025, | |
| "num_tokens": 5512939.0, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.17970401691331925, | |
| "grad_norm": 0.43914279341697693, | |
| "learning_rate": 7.705271466787641e-05, | |
| "loss": 0.3334, | |
| "num_tokens": 5665993.0, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1849894291754757, | |
| "grad_norm": 0.405638724565506, | |
| "learning_rate": 7.681750941811905e-05, | |
| "loss": 0.3739, | |
| "num_tokens": 5824925.0, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.19027484143763213, | |
| "grad_norm": 0.42802131175994873, | |
| "learning_rate": 7.657436989545827e-05, | |
| "loss": 0.3284, | |
| "num_tokens": 5982641.0, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.19556025369978858, | |
| "grad_norm": 0.41498205065727234, | |
| "learning_rate": 7.632336838482996e-05, | |
| "loss": 0.3022, | |
| "num_tokens": 6149598.0, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.20084566596194503, | |
| "grad_norm": 0.40431639552116394, | |
| "learning_rate": 7.60645795085246e-05, | |
| "loss": 0.3159, | |
| "num_tokens": 6320515.0, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.20613107822410148, | |
| "grad_norm": 0.32176268100738525, | |
| "learning_rate": 7.579808020400232e-05, | |
| "loss": 0.2938, | |
| "num_tokens": 6492543.0, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.21141649048625794, | |
| "grad_norm": 0.4644106924533844, | |
| "learning_rate": 7.55239497010194e-05, | |
| "loss": 0.3349, | |
| "num_tokens": 6672523.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2167019027484144, | |
| "grad_norm": 0.35432812571525574, | |
| "learning_rate": 7.52422694980736e-05, | |
| "loss": 0.3041, | |
| "num_tokens": 6844107.0, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.2219873150105708, | |
| "grad_norm": 0.44315239787101746, | |
| "learning_rate": 7.495312333817455e-05, | |
| "loss": 0.2612, | |
| "num_tokens": 7007468.0, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.22727272727272727, | |
| "grad_norm": 0.36817148327827454, | |
| "learning_rate": 7.465659718394734e-05, | |
| "loss": 0.3221, | |
| "num_tokens": 7156859.0, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.23255813953488372, | |
| "grad_norm": 0.39306601881980896, | |
| "learning_rate": 7.43527791920758e-05, | |
| "loss": 0.3426, | |
| "num_tokens": 7311920.0, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.23784355179704017, | |
| "grad_norm": 0.39284104108810425, | |
| "learning_rate": 7.404175968709388e-05, | |
| "loss": 0.3232, | |
| "num_tokens": 7461910.0, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.24312896405919662, | |
| "grad_norm": 0.3719322085380554, | |
| "learning_rate": 7.372363113453213e-05, | |
| "loss": 0.3277, | |
| "num_tokens": 7629200.0, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.24841437632135308, | |
| "grad_norm": 0.3720446527004242, | |
| "learning_rate": 7.339848811342796e-05, | |
| "loss": 0.3122, | |
| "num_tokens": 7790842.0, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.2536997885835095, | |
| "grad_norm": 0.34994134306907654, | |
| "learning_rate": 7.306642728820755e-05, | |
| "loss": 0.3404, | |
| "num_tokens": 7961093.0, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.25898520084566595, | |
| "grad_norm": 0.379666268825531, | |
| "learning_rate": 7.272754737994752e-05, | |
| "loss": 0.3254, | |
| "num_tokens": 8118785.0, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.2642706131078224, | |
| "grad_norm": 0.5034386515617371, | |
| "learning_rate": 7.238194913702544e-05, | |
| "loss": 0.3185, | |
| "num_tokens": 8279200.0, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.26955602536997886, | |
| "grad_norm": 0.38826555013656616, | |
| "learning_rate": 7.202973530516749e-05, | |
| "loss": 0.3021, | |
| "num_tokens": 8487897.0, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2748414376321353, | |
| "grad_norm": 0.3884974420070648, | |
| "learning_rate": 7.167101059690238e-05, | |
| "loss": 0.3221, | |
| "num_tokens": 8636638.0, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.28012684989429176, | |
| "grad_norm": 0.4962138533592224, | |
| "learning_rate": 7.130588166043048e-05, | |
| "loss": 0.3485, | |
| "num_tokens": 8798198.0, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.2854122621564482, | |
| "grad_norm": 0.4131626486778259, | |
| "learning_rate": 7.093445704791747e-05, | |
| "loss": 0.2897, | |
| "num_tokens": 8990965.0, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.29069767441860467, | |
| "grad_norm": 0.38657164573669434, | |
| "learning_rate": 7.055684718322205e-05, | |
| "loss": 0.3492, | |
| "num_tokens": 9152215.0, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.2959830866807611, | |
| "grad_norm": 0.2804111838340759, | |
| "learning_rate": 7.017316432906707e-05, | |
| "loss": 0.3243, | |
| "num_tokens": 9311537.0, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3012684989429176, | |
| "grad_norm": 0.3840746283531189, | |
| "learning_rate": 6.978352255366406e-05, | |
| "loss": 0.3657, | |
| "num_tokens": 9446553.0, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.30655391120507397, | |
| "grad_norm": 0.29615920782089233, | |
| "learning_rate": 6.938803769680094e-05, | |
| "loss": 0.3144, | |
| "num_tokens": 9625894.0, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.3118393234672304, | |
| "grad_norm": 0.3887929320335388, | |
| "learning_rate": 6.898682733540313e-05, | |
| "loss": 0.2967, | |
| "num_tokens": 9820246.0, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.3171247357293869, | |
| "grad_norm": 0.4418380856513977, | |
| "learning_rate": 6.8580010748578e-05, | |
| "loss": 0.3299, | |
| "num_tokens": 9995021.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3224101479915433, | |
| "grad_norm": 0.327908992767334, | |
| "learning_rate": 6.816770888215352e-05, | |
| "loss": 0.2879, | |
| "num_tokens": 10186048.0, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.3276955602536998, | |
| "grad_norm": 0.31937530636787415, | |
| "learning_rate": 6.775004431272132e-05, | |
| "loss": 0.3504, | |
| "num_tokens": 10343442.0, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.33298097251585623, | |
| "grad_norm": 0.3883694112300873, | |
| "learning_rate": 6.732714121119478e-05, | |
| "loss": 0.3188, | |
| "num_tokens": 10505725.0, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.3382663847780127, | |
| "grad_norm": 0.520715594291687, | |
| "learning_rate": 6.68991253058933e-05, | |
| "loss": 0.3348, | |
| "num_tokens": 10685454.0, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.34355179704016914, | |
| "grad_norm": 0.3867979943752289, | |
| "learning_rate": 6.646612384516355e-05, | |
| "loss": 0.2958, | |
| "num_tokens": 10839868.0, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.3488372093023256, | |
| "grad_norm": 0.4362066686153412, | |
| "learning_rate": 6.602826555954866e-05, | |
| "loss": 0.3052, | |
| "num_tokens": 10987360.0, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.35412262156448204, | |
| "grad_norm": 0.3443554639816284, | |
| "learning_rate": 6.558568062351694e-05, | |
| "loss": 0.3133, | |
| "num_tokens": 11147325.0, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.3594080338266385, | |
| "grad_norm": 0.3375532329082489, | |
| "learning_rate": 6.513850061676129e-05, | |
| "loss": 0.3187, | |
| "num_tokens": 11328945.0, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.36469344608879495, | |
| "grad_norm": 0.48123109340667725, | |
| "learning_rate": 6.468685848508066e-05, | |
| "loss": 0.3234, | |
| "num_tokens": 11491151.0, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.3699788583509514, | |
| "grad_norm": 0.350399911403656, | |
| "learning_rate": 6.423088850085563e-05, | |
| "loss": 0.2754, | |
| "num_tokens": 11659065.0, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3752642706131078, | |
| "grad_norm": 0.3584173917770386, | |
| "learning_rate": 6.377072622312942e-05, | |
| "loss": 0.3336, | |
| "num_tokens": 11863689.0, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.38054968287526425, | |
| "grad_norm": 0.3181978464126587, | |
| "learning_rate": 6.330650845730648e-05, | |
| "loss": 0.3418, | |
| "num_tokens": 12006544.0, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3858350951374207, | |
| "grad_norm": 0.30926552414894104, | |
| "learning_rate": 6.283837321448044e-05, | |
| "loss": 0.3248, | |
| "num_tokens": 12155215.0, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.39112050739957716, | |
| "grad_norm": 0.30399248003959656, | |
| "learning_rate": 6.236645967040363e-05, | |
| "loss": 0.3199, | |
| "num_tokens": 12304714.0, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3964059196617336, | |
| "grad_norm": 0.3858378827571869, | |
| "learning_rate": 6.189090812411056e-05, | |
| "loss": 0.2905, | |
| "num_tokens": 12467243.0, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.40169133192389006, | |
| "grad_norm": 0.3069511950016022, | |
| "learning_rate": 6.141185995620703e-05, | |
| "loss": 0.3314, | |
| "num_tokens": 12641866.0, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4069767441860465, | |
| "grad_norm": 0.31766510009765625, | |
| "learning_rate": 6.0929457586838143e-05, | |
| "loss": 0.3084, | |
| "num_tokens": 12805214.0, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.41226215644820297, | |
| "grad_norm": 0.5858482122421265, | |
| "learning_rate": 6.044384443334691e-05, | |
| "loss": 0.2878, | |
| "num_tokens": 12967228.0, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.4175475687103594, | |
| "grad_norm": 0.34514257311820984, | |
| "learning_rate": 5.9955164867636644e-05, | |
| "loss": 0.3471, | |
| "num_tokens": 13104829.0, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.42283298097251587, | |
| "grad_norm": 0.4240577816963196, | |
| "learning_rate": 5.9463564173249374e-05, | |
| "loss": 0.2973, | |
| "num_tokens": 13288474.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4281183932346723, | |
| "grad_norm": 0.3454580008983612, | |
| "learning_rate": 5.896918850217336e-05, | |
| "loss": 0.3241, | |
| "num_tokens": 13451876.0, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.4334038054968288, | |
| "grad_norm": 0.3258069157600403, | |
| "learning_rate": 5.8472184831392364e-05, | |
| "loss": 0.2968, | |
| "num_tokens": 13633506.0, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.43868921775898523, | |
| "grad_norm": 0.35886386036872864, | |
| "learning_rate": 5.797270091918965e-05, | |
| "loss": 0.3173, | |
| "num_tokens": 13794654.0, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.4439746300211416, | |
| "grad_norm": 0.5046164393424988, | |
| "learning_rate": 5.747088526121975e-05, | |
| "loss": 0.3439, | |
| "num_tokens": 13928763.0, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4492600422832981, | |
| "grad_norm": 0.34629306197166443, | |
| "learning_rate": 5.696688704636091e-05, | |
| "loss": 0.3095, | |
| "num_tokens": 14091776.0, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 0.35323911905288696, | |
| "learning_rate": 5.6460856112361576e-05, | |
| "loss": 0.3053, | |
| "num_tokens": 14283410.0, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.459830866807611, | |
| "grad_norm": 0.3977009356021881, | |
| "learning_rate": 5.595294290129389e-05, | |
| "loss": 0.2885, | |
| "num_tokens": 14450115.0, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.46511627906976744, | |
| "grad_norm": 0.34842291474342346, | |
| "learning_rate": 5.5443298414827514e-05, | |
| "loss": 0.2904, | |
| "num_tokens": 14629092.0, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4704016913319239, | |
| "grad_norm": 0.3181321322917938, | |
| "learning_rate": 5.4932074169337124e-05, | |
| "loss": 0.3289, | |
| "num_tokens": 14766334.0, | |
| "step": 445 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 946, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.8225814430631854e+18, | |
| "train_batch_size": 14, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |