Text Generation
PEFT
Safetensors
English
lora
qwen2.5
qwen2.5-coder
code
reasoning
pedagogy
fine-tuned
conversational
Instructions to use mechramc/codek-qwen2.5-coder-7b-lora-v2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use mechramc/codek-qwen2.5-coder-7b-lora-v2 with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("unsloth/Qwen2.5-Coder-7B-Instruct") model = PeftModel.from_pretrained(base_model, "mechramc/codek-qwen2.5-coder-7b-lora-v2") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.276707530647986, | |
| "eval_steps": 100, | |
| "global_step": 1300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.017513134851138354, | |
| "grad_norm": 0.4135197103023529, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.8109177589416504, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03502626970227671, | |
| "grad_norm": 0.5954136252403259, | |
| "learning_rate": 7.6e-05, | |
| "loss": 0.6212304115295411, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05253940455341506, | |
| "grad_norm": 0.4027167856693268, | |
| "learning_rate": 0.000116, | |
| "loss": 0.44783411026000974, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07005253940455342, | |
| "grad_norm": 0.47371360659599304, | |
| "learning_rate": 0.00015600000000000002, | |
| "loss": 0.3630207538604736, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08756567425569177, | |
| "grad_norm": 0.48840901255607605, | |
| "learning_rate": 0.000196, | |
| "loss": 0.32424685955047605, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10507880910683012, | |
| "grad_norm": 0.5532234311103821, | |
| "learning_rate": 0.0001989176187612748, | |
| "loss": 0.2953991413116455, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12259194395796848, | |
| "grad_norm": 0.5430059432983398, | |
| "learning_rate": 0.00019771497294046903, | |
| "loss": 0.26429708003997804, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14010507880910683, | |
| "grad_norm": 0.5477070212364197, | |
| "learning_rate": 0.00019651232711966328, | |
| "loss": 0.2550451040267944, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.15761821366024517, | |
| "grad_norm": 0.37017086148262024, | |
| "learning_rate": 0.00019530968129885748, | |
| "loss": 0.23371753692626954, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.17513134851138354, | |
| "grad_norm": 0.38276150822639465, | |
| "learning_rate": 0.0001941070354780517, | |
| "loss": 0.2195589542388916, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.17513134851138354, | |
| "eval_loss": 0.23231205344200134, | |
| "eval_runtime": 169.8531, | |
| "eval_samples_per_second": 2.991, | |
| "eval_steps_per_second": 0.748, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19264448336252188, | |
| "grad_norm": 0.406323105096817, | |
| "learning_rate": 0.00019290438965724596, | |
| "loss": 0.2108442783355713, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.21015761821366025, | |
| "grad_norm": 0.47465822100639343, | |
| "learning_rate": 0.00019170174383644018, | |
| "loss": 0.2249575138092041, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2276707530647986, | |
| "grad_norm": 0.35268914699554443, | |
| "learning_rate": 0.0001904990980156344, | |
| "loss": 0.16998076438903809, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.24518388791593695, | |
| "grad_norm": 0.31479501724243164, | |
| "learning_rate": 0.00018929645219482863, | |
| "loss": 0.1623205780982971, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2626970227670753, | |
| "grad_norm": 0.3868594467639923, | |
| "learning_rate": 0.00018809380637402286, | |
| "loss": 0.16868008375167848, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.28021015761821366, | |
| "grad_norm": 0.4887761175632477, | |
| "learning_rate": 0.00018689116055321708, | |
| "loss": 0.1882340431213379, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.29772329246935203, | |
| "grad_norm": 0.39412927627563477, | |
| "learning_rate": 0.0001856885147324113, | |
| "loss": 0.15920686721801758, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.31523642732049034, | |
| "grad_norm": 0.41622865200042725, | |
| "learning_rate": 0.00018448586891160553, | |
| "loss": 0.16607775688171386, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3327495621716287, | |
| "grad_norm": 0.4045696258544922, | |
| "learning_rate": 0.00018328322309079978, | |
| "loss": 0.158127498626709, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.3502626970227671, | |
| "grad_norm": 0.3789847493171692, | |
| "learning_rate": 0.00018208057726999398, | |
| "loss": 0.14632443189620972, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3502626970227671, | |
| "eval_loss": 0.1353635936975479, | |
| "eval_runtime": 171.8534, | |
| "eval_samples_per_second": 2.956, | |
| "eval_steps_per_second": 0.739, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.36777583187390545, | |
| "grad_norm": 0.41194388270378113, | |
| "learning_rate": 0.00018087793144918823, | |
| "loss": 0.1293831706047058, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.38528896672504376, | |
| "grad_norm": 0.35434651374816895, | |
| "learning_rate": 0.00017967528562838245, | |
| "loss": 0.13147668838500975, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4028021015761821, | |
| "grad_norm": 0.3050230145454407, | |
| "learning_rate": 0.00017847263980757668, | |
| "loss": 0.12810969352722168, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4203152364273205, | |
| "grad_norm": 0.29852065443992615, | |
| "learning_rate": 0.0001772699939867709, | |
| "loss": 0.13389307260513306, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.43782837127845886, | |
| "grad_norm": 0.3992239832878113, | |
| "learning_rate": 0.00017606734816596513, | |
| "loss": 0.11474900245666504, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4553415061295972, | |
| "grad_norm": 0.323345422744751, | |
| "learning_rate": 0.00017486470234515935, | |
| "loss": 0.11180757284164429, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.47285464098073554, | |
| "grad_norm": 0.3820851147174835, | |
| "learning_rate": 0.00017366205652435358, | |
| "loss": 0.10637552738189697, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.4903677758318739, | |
| "grad_norm": 0.3785695433616638, | |
| "learning_rate": 0.0001724594107035478, | |
| "loss": 0.11243565082550049, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5078809106830122, | |
| "grad_norm": 0.34767481684684753, | |
| "learning_rate": 0.00017125676488274205, | |
| "loss": 0.11057982444763184, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5253940455341506, | |
| "grad_norm": 0.32242536544799805, | |
| "learning_rate": 0.00017005411906193628, | |
| "loss": 0.09878214001655579, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5253940455341506, | |
| "eval_loss": 0.10276732593774796, | |
| "eval_runtime": 170.0789, | |
| "eval_samples_per_second": 2.987, | |
| "eval_steps_per_second": 0.747, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.542907180385289, | |
| "grad_norm": 0.3188435435295105, | |
| "learning_rate": 0.00016885147324113047, | |
| "loss": 0.08771577477455139, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5604203152364273, | |
| "grad_norm": 0.2941615879535675, | |
| "learning_rate": 0.00016764882742032473, | |
| "loss": 0.08557047247886658, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5779334500875657, | |
| "grad_norm": 0.2936120927333832, | |
| "learning_rate": 0.00016644618159951895, | |
| "loss": 0.08636216521263122, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5954465849387041, | |
| "grad_norm": 0.21349965035915375, | |
| "learning_rate": 0.0001652435357787132, | |
| "loss": 0.08149101734161376, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6129597197898424, | |
| "grad_norm": 0.2442740797996521, | |
| "learning_rate": 0.0001640408899579074, | |
| "loss": 0.08436259627342224, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6304728546409807, | |
| "grad_norm": 0.3144635856151581, | |
| "learning_rate": 0.00016283824413710162, | |
| "loss": 0.0912843644618988, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.647985989492119, | |
| "grad_norm": 0.18774041533470154, | |
| "learning_rate": 0.00016163559831629587, | |
| "loss": 0.08484984040260315, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6654991243432574, | |
| "grad_norm": 0.3200187385082245, | |
| "learning_rate": 0.0001604329524954901, | |
| "loss": 0.08420997262001037, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6830122591943958, | |
| "grad_norm": 0.20744681358337402, | |
| "learning_rate": 0.0001592303066746843, | |
| "loss": 0.07883568406105042, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7005253940455342, | |
| "grad_norm": 0.49990326166152954, | |
| "learning_rate": 0.00015802766085387855, | |
| "loss": 0.07491461634635925, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7005253940455342, | |
| "eval_loss": 0.08543122559785843, | |
| "eval_runtime": 169.5964, | |
| "eval_samples_per_second": 2.995, | |
| "eval_steps_per_second": 0.749, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7180385288966725, | |
| "grad_norm": 0.21963991224765778, | |
| "learning_rate": 0.00015682501503307277, | |
| "loss": 0.07940490245819092, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7355516637478109, | |
| "grad_norm": 0.282270610332489, | |
| "learning_rate": 0.000155622369212267, | |
| "loss": 0.08389427065849304, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7530647985989493, | |
| "grad_norm": 0.19522342085838318, | |
| "learning_rate": 0.00015441972339146122, | |
| "loss": 0.07796943187713623, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7705779334500875, | |
| "grad_norm": 0.20144295692443848, | |
| "learning_rate": 0.00015321707757065545, | |
| "loss": 0.08569519519805908, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7880910683012259, | |
| "grad_norm": 0.31299343705177307, | |
| "learning_rate": 0.0001520144317498497, | |
| "loss": 0.07234247326850891, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8056042031523643, | |
| "grad_norm": 0.22233198583126068, | |
| "learning_rate": 0.0001508117859290439, | |
| "loss": 0.06918607354164123, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8231173380035026, | |
| "grad_norm": 0.3281087577342987, | |
| "learning_rate": 0.00014960914010823812, | |
| "loss": 0.06424351334571839, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.840630472854641, | |
| "grad_norm": 0.23634330928325653, | |
| "learning_rate": 0.00014840649428743237, | |
| "loss": 0.07089964151382447, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8581436077057794, | |
| "grad_norm": 0.24085308611392975, | |
| "learning_rate": 0.0001472038484666266, | |
| "loss": 0.07725317478179931, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.8756567425569177, | |
| "grad_norm": 0.2506239712238312, | |
| "learning_rate": 0.00014600120264582082, | |
| "loss": 0.07955536246299744, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8756567425569177, | |
| "eval_loss": 0.07601634413003922, | |
| "eval_runtime": 170.4186, | |
| "eval_samples_per_second": 2.981, | |
| "eval_steps_per_second": 0.745, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8931698774080561, | |
| "grad_norm": 0.30001509189605713, | |
| "learning_rate": 0.00014479855682501504, | |
| "loss": 0.06071768999099732, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9106830122591943, | |
| "grad_norm": 0.1644354909658432, | |
| "learning_rate": 0.00014359591100420927, | |
| "loss": 0.07156956791877747, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9281961471103327, | |
| "grad_norm": 0.2289579063653946, | |
| "learning_rate": 0.0001423932651834035, | |
| "loss": 0.07050368785858155, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.9457092819614711, | |
| "grad_norm": 0.3195700943470001, | |
| "learning_rate": 0.00014119061936259772, | |
| "loss": 0.06230233311653137, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.9632224168126094, | |
| "grad_norm": 0.15884605050086975, | |
| "learning_rate": 0.00013998797354179194, | |
| "loss": 0.06492781639099121, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.9807355516637478, | |
| "grad_norm": 0.17338015139102936, | |
| "learning_rate": 0.0001387853277209862, | |
| "loss": 0.07274928689002991, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.9982486865148862, | |
| "grad_norm": 0.18797871470451355, | |
| "learning_rate": 0.0001375826819001804, | |
| "loss": 0.07553291320800781, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.0157618213660244, | |
| "grad_norm": 0.14001163840293884, | |
| "learning_rate": 0.00013638003607937464, | |
| "loss": 0.04513072073459625, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.0332749562171628, | |
| "grad_norm": 0.25820890069007874, | |
| "learning_rate": 0.00013517739025856887, | |
| "loss": 0.05151134729385376, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.0507880910683012, | |
| "grad_norm": 0.2387373149394989, | |
| "learning_rate": 0.0001339747444377631, | |
| "loss": 0.05233837962150574, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.0507880910683012, | |
| "eval_loss": 0.07496609538793564, | |
| "eval_runtime": 169.9129, | |
| "eval_samples_per_second": 2.99, | |
| "eval_steps_per_second": 0.747, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.0683012259194395, | |
| "grad_norm": 0.21280422806739807, | |
| "learning_rate": 0.00013277209861695731, | |
| "loss": 0.04595586657524109, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.085814360770578, | |
| "grad_norm": 0.2865266799926758, | |
| "learning_rate": 0.00013156945279615154, | |
| "loss": 0.04963254630565643, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.1033274956217163, | |
| "grad_norm": 0.19880151748657227, | |
| "learning_rate": 0.00013036680697534576, | |
| "loss": 0.05288234353065491, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.1208406304728546, | |
| "grad_norm": 0.25318190455436707, | |
| "learning_rate": 0.00012916416115454, | |
| "loss": 0.04070430099964142, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.138353765323993, | |
| "grad_norm": 0.2229541689157486, | |
| "learning_rate": 0.0001279615153337342, | |
| "loss": 0.04462625682353973, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.1558669001751314, | |
| "grad_norm": 0.15195652842521667, | |
| "learning_rate": 0.00012675886951292846, | |
| "loss": 0.04568430483341217, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.1733800350262698, | |
| "grad_norm": 0.2872307300567627, | |
| "learning_rate": 0.0001255562236921227, | |
| "loss": 0.04056203365325928, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.1908931698774081, | |
| "grad_norm": 0.30495700240135193, | |
| "learning_rate": 0.00012435357787131689, | |
| "loss": 0.047316303849220274, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.2084063047285465, | |
| "grad_norm": 0.1586247980594635, | |
| "learning_rate": 0.00012315093205051114, | |
| "loss": 0.044099316000938416, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.2259194395796849, | |
| "grad_norm": 0.19665417075157166, | |
| "learning_rate": 0.00012194828622970536, | |
| "loss": 0.04525145888328552, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.2259194395796849, | |
| "eval_loss": 0.07472622394561768, | |
| "eval_runtime": 169.568, | |
| "eval_samples_per_second": 2.996, | |
| "eval_steps_per_second": 0.749, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.2434325744308232, | |
| "grad_norm": 0.21693575382232666, | |
| "learning_rate": 0.00012074564040889957, | |
| "loss": 0.04104744493961334, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.2609457092819616, | |
| "grad_norm": 0.24825339019298553, | |
| "learning_rate": 0.00011954299458809381, | |
| "loss": 0.0438425600528717, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.2784588441331, | |
| "grad_norm": 0.18047627806663513, | |
| "learning_rate": 0.00011834034876728803, | |
| "loss": 0.047738096117973326, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.295971978984238, | |
| "grad_norm": 0.19772164523601532, | |
| "learning_rate": 0.00011713770294648227, | |
| "loss": 0.04714350998401642, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.3134851138353765, | |
| "grad_norm": 0.22316114604473114, | |
| "learning_rate": 0.0001159350571256765, | |
| "loss": 0.04388459920883179, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.3309982486865148, | |
| "grad_norm": 0.1677238643169403, | |
| "learning_rate": 0.00011473241130487071, | |
| "loss": 0.04296576082706451, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.3485113835376532, | |
| "grad_norm": 0.2544882595539093, | |
| "learning_rate": 0.00011352976548406496, | |
| "loss": 0.037767985463142396, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.3660245183887916, | |
| "grad_norm": 0.17373642325401306, | |
| "learning_rate": 0.00011232711966325917, | |
| "loss": 0.04673008918762207, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.38353765323993, | |
| "grad_norm": 0.23099961876869202, | |
| "learning_rate": 0.00011112447384245341, | |
| "loss": 0.04906592071056366, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.4010507880910683, | |
| "grad_norm": 0.2572455406188965, | |
| "learning_rate": 0.00010992182802164763, | |
| "loss": 0.04228177070617676, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.4010507880910683, | |
| "eval_loss": 0.07377293705940247, | |
| "eval_runtime": 169.6978, | |
| "eval_samples_per_second": 2.994, | |
| "eval_steps_per_second": 0.748, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.4185639229422067, | |
| "grad_norm": 0.1933060735464096, | |
| "learning_rate": 0.00010871918220084186, | |
| "loss": 0.039757218956947324, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.436077057793345, | |
| "grad_norm": 0.21861182153224945, | |
| "learning_rate": 0.0001075165363800361, | |
| "loss": 0.04450837075710297, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.4535901926444834, | |
| "grad_norm": 0.27015894651412964, | |
| "learning_rate": 0.0001063138905592303, | |
| "loss": 0.04501202404499054, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.4711033274956218, | |
| "grad_norm": 0.15882235765457153, | |
| "learning_rate": 0.00010511124473842453, | |
| "loss": 0.040595722198486325, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.4886164623467601, | |
| "grad_norm": 0.22079160809516907, | |
| "learning_rate": 0.00010390859891761877, | |
| "loss": 0.04613872766494751, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.5061295971978983, | |
| "grad_norm": 0.26043882966041565, | |
| "learning_rate": 0.00010270595309681299, | |
| "loss": 0.052975207567214966, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.5236427320490367, | |
| "grad_norm": 0.1896980255842209, | |
| "learning_rate": 0.00010150330727600723, | |
| "loss": 0.04145742654800415, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.541155866900175, | |
| "grad_norm": 0.17354312539100647, | |
| "learning_rate": 0.00010030066145520146, | |
| "loss": 0.04943464994430542, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.5586690017513134, | |
| "grad_norm": 0.14007078111171722, | |
| "learning_rate": 9.909801563439568e-05, | |
| "loss": 0.04217578768730164, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.5761821366024518, | |
| "grad_norm": 0.20131802558898926, | |
| "learning_rate": 9.78953698135899e-05, | |
| "loss": 0.041672542691230774, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.5761821366024518, | |
| "eval_loss": 0.07051914185285568, | |
| "eval_runtime": 169.9735, | |
| "eval_samples_per_second": 2.989, | |
| "eval_steps_per_second": 0.747, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.5936952714535901, | |
| "grad_norm": 0.22193501889705658, | |
| "learning_rate": 9.669272399278413e-05, | |
| "loss": 0.04524196684360504, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.6112084063047285, | |
| "grad_norm": 0.23595920205116272, | |
| "learning_rate": 9.549007817197835e-05, | |
| "loss": 0.04126276075839996, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.6287215411558669, | |
| "grad_norm": 0.2922545373439789, | |
| "learning_rate": 9.428743235117259e-05, | |
| "loss": 0.04022812843322754, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.6462346760070052, | |
| "grad_norm": 0.23278813064098358, | |
| "learning_rate": 9.30847865303668e-05, | |
| "loss": 0.04213928878307342, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.6637478108581436, | |
| "grad_norm": 0.14974910020828247, | |
| "learning_rate": 9.188214070956104e-05, | |
| "loss": 0.0363939642906189, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.681260945709282, | |
| "grad_norm": 0.1183304563164711, | |
| "learning_rate": 9.067949488875526e-05, | |
| "loss": 0.04207303524017334, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.6987740805604203, | |
| "grad_norm": 0.23170360922813416, | |
| "learning_rate": 8.94768490679495e-05, | |
| "loss": 0.042323988676071164, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.7162872154115587, | |
| "grad_norm": 0.14556758105754852, | |
| "learning_rate": 8.827420324714371e-05, | |
| "loss": 0.042339283227920535, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.733800350262697, | |
| "grad_norm": 0.1421191394329071, | |
| "learning_rate": 8.707155742633795e-05, | |
| "loss": 0.04450683891773224, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.7513134851138354, | |
| "grad_norm": 0.31845614314079285, | |
| "learning_rate": 8.586891160553218e-05, | |
| "loss": 0.042928069829940796, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.7513134851138354, | |
| "eval_loss": 0.0688522532582283, | |
| "eval_runtime": 169.5678, | |
| "eval_samples_per_second": 2.996, | |
| "eval_steps_per_second": 0.749, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.7688266199649738, | |
| "grad_norm": 0.1398610770702362, | |
| "learning_rate": 8.46662657847264e-05, | |
| "loss": 0.042378559708595276, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.7863397548161122, | |
| "grad_norm": 0.18888983130455017, | |
| "learning_rate": 8.346361996392062e-05, | |
| "loss": 0.044092172384262086, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.8038528896672505, | |
| "grad_norm": 0.192138671875, | |
| "learning_rate": 8.226097414311485e-05, | |
| "loss": 0.03955377042293549, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.821366024518389, | |
| "grad_norm": 0.2001374512910843, | |
| "learning_rate": 8.105832832230909e-05, | |
| "loss": 0.04774285852909088, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.8388791593695273, | |
| "grad_norm": 0.24916240572929382, | |
| "learning_rate": 7.985568250150331e-05, | |
| "loss": 0.044192954897880554, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.8563922942206657, | |
| "grad_norm": 0.21104031801223755, | |
| "learning_rate": 7.865303668069754e-05, | |
| "loss": 0.0387516975402832, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.873905429071804, | |
| "grad_norm": 0.27948206663131714, | |
| "learning_rate": 7.745039085989176e-05, | |
| "loss": 0.042763397097587585, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.8914185639229422, | |
| "grad_norm": 0.21115849912166595, | |
| "learning_rate": 7.6247745039086e-05, | |
| "loss": 0.03943166434764862, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.9089316987740805, | |
| "grad_norm": 0.24164821207523346, | |
| "learning_rate": 7.504509921828022e-05, | |
| "loss": 0.04395500421524048, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.926444833625219, | |
| "grad_norm": 0.14232757687568665, | |
| "learning_rate": 7.384245339747445e-05, | |
| "loss": 0.03802197575569153, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.926444833625219, | |
| "eval_loss": 0.0663708746433258, | |
| "eval_runtime": 170.0427, | |
| "eval_samples_per_second": 2.987, | |
| "eval_steps_per_second": 0.747, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.9439579684763573, | |
| "grad_norm": 0.20456406474113464, | |
| "learning_rate": 7.263980757666867e-05, | |
| "loss": 0.04351660311222076, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.9614711033274956, | |
| "grad_norm": 0.28461146354675293, | |
| "learning_rate": 7.14371617558629e-05, | |
| "loss": 0.04411421418190002, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.978984238178634, | |
| "grad_norm": 0.33428093791007996, | |
| "learning_rate": 7.023451593505713e-05, | |
| "loss": 0.04533115029335022, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.9964973730297724, | |
| "grad_norm": 0.2965065538883209, | |
| "learning_rate": 6.903187011425134e-05, | |
| "loss": 0.04683744609355926, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.0140105078809105, | |
| "grad_norm": 0.13189074397087097, | |
| "learning_rate": 6.782922429344558e-05, | |
| "loss": 0.024469637870788576, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.031523642732049, | |
| "grad_norm": 0.26192790269851685, | |
| "learning_rate": 6.662657847263981e-05, | |
| "loss": 0.020343032479286195, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.0490367775831873, | |
| "grad_norm": 0.17017051577568054, | |
| "learning_rate": 6.542393265183405e-05, | |
| "loss": 0.023167347908020018, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.0665499124343256, | |
| "grad_norm": 0.23270311951637268, | |
| "learning_rate": 6.422128683102826e-05, | |
| "loss": 0.019265547394752502, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.084063047285464, | |
| "grad_norm": 0.17566721141338348, | |
| "learning_rate": 6.30186410102225e-05, | |
| "loss": 0.020077353715896605, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.1015761821366024, | |
| "grad_norm": 0.21460862457752228, | |
| "learning_rate": 6.181599518941672e-05, | |
| "loss": 0.020433691143989564, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.1015761821366024, | |
| "eval_loss": 0.0755230188369751, | |
| "eval_runtime": 169.6234, | |
| "eval_samples_per_second": 2.995, | |
| "eval_steps_per_second": 0.749, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.1190893169877407, | |
| "grad_norm": 0.19966909289360046, | |
| "learning_rate": 6.061334936861095e-05, | |
| "loss": 0.019319312274456026, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.136602451838879, | |
| "grad_norm": 0.19373339414596558, | |
| "learning_rate": 5.941070354780517e-05, | |
| "loss": 0.022010722756385805, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.1541155866900175, | |
| "grad_norm": 0.19323857128620148, | |
| "learning_rate": 5.82080577269994e-05, | |
| "loss": 0.021162202954292296, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.171628721541156, | |
| "grad_norm": 0.16135787963867188, | |
| "learning_rate": 5.700541190619363e-05, | |
| "loss": 0.02209024876356125, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.189141856392294, | |
| "grad_norm": 0.1409604251384735, | |
| "learning_rate": 5.580276608538786e-05, | |
| "loss": 0.020828820765018463, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.2066549912434326, | |
| "grad_norm": 0.15199248492717743, | |
| "learning_rate": 5.460012026458209e-05, | |
| "loss": 0.019746646285057068, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.224168126094571, | |
| "grad_norm": 0.1164596751332283, | |
| "learning_rate": 5.339747444377631e-05, | |
| "loss": 0.02107318639755249, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.2416812609457093, | |
| "grad_norm": 0.14257144927978516, | |
| "learning_rate": 5.219482862297054e-05, | |
| "loss": 0.018259820342063905, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.2591943957968477, | |
| "grad_norm": 0.1540592759847641, | |
| "learning_rate": 5.0992182802164765e-05, | |
| "loss": 0.0190964937210083, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.276707530647986, | |
| "grad_norm": 0.2179027795791626, | |
| "learning_rate": 4.978953698135899e-05, | |
| "loss": 0.020862923562526704, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.276707530647986, | |
| "eval_loss": 0.0765165463089943, | |
| "eval_runtime": 170.3828, | |
| "eval_samples_per_second": 2.982, | |
| "eval_steps_per_second": 0.745, | |
| "step": 1300 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1713, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.0067417630582374e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |