| { | |
| "best_metric": 0.022735346358792183, | |
| "best_model_checkpoint": "./results/checkpoint-10634", | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 10634, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.7369279265403748, | |
| "learning_rate": 4.952981004325748e-05, | |
| "loss": 4.7032, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.5547892451286316, | |
| "learning_rate": 4.905962008651496e-05, | |
| "loss": 4.7033, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.5557618737220764, | |
| "learning_rate": 4.858943012977243e-05, | |
| "loss": 4.7015, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.5398121476173401, | |
| "learning_rate": 4.8119240173029906e-05, | |
| "loss": 4.7038, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 0.5128409266471863, | |
| "learning_rate": 4.7649050216287384e-05, | |
| "loss": 4.7003, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 0.57951420545578, | |
| "learning_rate": 4.717886025954486e-05, | |
| "loss": 4.7018, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 0.48418277502059937, | |
| "learning_rate": 4.670867030280233e-05, | |
| "loss": 4.7005, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 0.5264779329299927, | |
| "learning_rate": 4.623848034605981e-05, | |
| "loss": 4.7034, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 0.4631044268608093, | |
| "learning_rate": 4.576829038931729e-05, | |
| "loss": 4.7018, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 0.49229735136032104, | |
| "learning_rate": 4.5298100432574765e-05, | |
| "loss": 4.6994, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.5025375485420227, | |
| "learning_rate": 4.482791047583224e-05, | |
| "loss": 4.7019, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 0.5106560587882996, | |
| "learning_rate": 4.4357720519089714e-05, | |
| "loss": 4.7017, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.4811244308948517, | |
| "learning_rate": 4.388753056234719e-05, | |
| "loss": 4.7015, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 0.5047605037689209, | |
| "learning_rate": 4.341734060560467e-05, | |
| "loss": 4.7011, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.4720272123813629, | |
| "learning_rate": 4.2947150648862146e-05, | |
| "loss": 4.7004, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.3932195007801056, | |
| "learning_rate": 4.247696069211962e-05, | |
| "loss": 4.7028, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.39717328548431396, | |
| "learning_rate": 4.2006770735377095e-05, | |
| "loss": 4.702, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 0.46395114064216614, | |
| "learning_rate": 4.153658077863457e-05, | |
| "loss": 4.7005, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.4270722270011902, | |
| "learning_rate": 4.106639082189205e-05, | |
| "loss": 4.7, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 0.4068189561367035, | |
| "learning_rate": 4.059620086514952e-05, | |
| "loss": 4.7016, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 0.4495134651660919, | |
| "learning_rate": 4.0126010908407e-05, | |
| "loss": 4.7, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 0.4774065613746643, | |
| "learning_rate": 3.9655820951664476e-05, | |
| "loss": 4.7022, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 0.4217697083950043, | |
| "learning_rate": 3.9185630994921954e-05, | |
| "loss": 4.7016, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 0.4176025092601776, | |
| "learning_rate": 3.871544103817943e-05, | |
| "loss": 4.7005, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 0.43538960814476013, | |
| "learning_rate": 3.82452510814369e-05, | |
| "loss": 4.7002, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 0.3962569236755371, | |
| "learning_rate": 3.777506112469438e-05, | |
| "loss": 4.7015, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 0.41195255517959595, | |
| "learning_rate": 3.730487116795186e-05, | |
| "loss": 4.7014, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 0.3937325179576874, | |
| "learning_rate": 3.6834681211209335e-05, | |
| "loss": 4.7015, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 0.39071130752563477, | |
| "learning_rate": 3.6364491254466806e-05, | |
| "loss": 4.6994, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.40909039974212646, | |
| "learning_rate": 3.5894301297724284e-05, | |
| "loss": 4.7019, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 0.3900412321090698, | |
| "learning_rate": 3.542411134098176e-05, | |
| "loss": 4.6996, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.5122581720352173, | |
| "learning_rate": 3.495392138423923e-05, | |
| "loss": 4.6992, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 0.38495704531669617, | |
| "learning_rate": 3.448373142749671e-05, | |
| "loss": 4.7024, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.40592727065086365, | |
| "learning_rate": 3.401354147075419e-05, | |
| "loss": 4.7002, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 0.4175421893596649, | |
| "learning_rate": 3.354335151401166e-05, | |
| "loss": 4.6993, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 0.43954411149024963, | |
| "learning_rate": 3.3073161557269136e-05, | |
| "loss": 4.7016, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 0.501674473285675, | |
| "learning_rate": 3.2602971600526614e-05, | |
| "loss": 4.6995, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 0.3901698589324951, | |
| "learning_rate": 3.2132781643784084e-05, | |
| "loss": 4.7006, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 0.3993096649646759, | |
| "learning_rate": 3.166259168704156e-05, | |
| "loss": 4.699, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 0.43237388134002686, | |
| "learning_rate": 3.119240173029904e-05, | |
| "loss": 4.7001, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 2.5795607566833496, | |
| "learning_rate": 3.072221177355652e-05, | |
| "loss": 4.6928, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 0.9452407956123352, | |
| "learning_rate": 3.0252021816814e-05, | |
| "loss": 4.7014, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 0.7060949802398682, | |
| "learning_rate": 2.978183186007147e-05, | |
| "loss": 4.707, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.052825689315796, | |
| "learning_rate": 2.9311641903328947e-05, | |
| "loss": 4.7014, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 0.7108451128005981, | |
| "learning_rate": 2.8841451946586424e-05, | |
| "loss": 4.7027, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 0.6421079635620117, | |
| "learning_rate": 2.83712619898439e-05, | |
| "loss": 4.7033, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.5927799940109253, | |
| "learning_rate": 2.7901072033101373e-05, | |
| "loss": 4.7044, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.5713520050048828, | |
| "learning_rate": 2.743088207635885e-05, | |
| "loss": 4.7022, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.5557407140731812, | |
| "learning_rate": 2.6960692119616325e-05, | |
| "loss": 4.7036, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 0.37561553716659546, | |
| "learning_rate": 2.6490502162873802e-05, | |
| "loss": 4.7011, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.404910683631897, | |
| "learning_rate": 2.602031220613128e-05, | |
| "loss": 4.7007, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 0.39087265729904175, | |
| "learning_rate": 2.5550122249388754e-05, | |
| "loss": 4.7021, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.3689398169517517, | |
| "learning_rate": 2.507993229264623e-05, | |
| "loss": 4.7004, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.008644168146832444, | |
| "eval_f1": 0.00014816254395838658, | |
| "eval_loss": 4.700786113739014, | |
| "eval_precision": 7.472164295071267e-05, | |
| "eval_recall": 0.008644168146832444, | |
| "eval_runtime": 96.2388, | |
| "eval_samples_per_second": 87.75, | |
| "eval_steps_per_second": 4.884, | |
| "step": 5317 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 0.3822825849056244, | |
| "learning_rate": 2.4609742335903706e-05, | |
| "loss": 4.7002, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 0.421373575925827, | |
| "learning_rate": 2.413955237916118e-05, | |
| "loss": 4.6991, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 0.4438491463661194, | |
| "learning_rate": 2.3669362422418658e-05, | |
| "loss": 4.7001, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 0.812016487121582, | |
| "learning_rate": 2.3199172465676132e-05, | |
| "loss": 4.6941, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 1.119163155555725, | |
| "learning_rate": 2.272898250893361e-05, | |
| "loss": 4.6807, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 1.1431705951690674, | |
| "learning_rate": 2.2258792552191084e-05, | |
| "loss": 4.6842, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 1.4402434825897217, | |
| "learning_rate": 2.1788602595448562e-05, | |
| "loss": 4.6653, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 2.073824644088745, | |
| "learning_rate": 2.131841263870604e-05, | |
| "loss": 4.6543, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 1.6333357095718384, | |
| "learning_rate": 2.0848222681963514e-05, | |
| "loss": 4.6394, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 1.206061840057373, | |
| "learning_rate": 2.037803272522099e-05, | |
| "loss": 4.6213, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.5230748653411865, | |
| "learning_rate": 1.9907842768478466e-05, | |
| "loss": 4.7044, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 0.4634922444820404, | |
| "learning_rate": 1.9437652811735943e-05, | |
| "loss": 4.7031, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 0.47288939356803894, | |
| "learning_rate": 1.8967462854993417e-05, | |
| "loss": 4.7027, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 0.4731047749519348, | |
| "learning_rate": 1.8497272898250895e-05, | |
| "loss": 4.7014, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.5110020637512207, | |
| "learning_rate": 1.802708294150837e-05, | |
| "loss": 4.6995, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 0.5348946452140808, | |
| "learning_rate": 1.7556892984765847e-05, | |
| "loss": 4.7013, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.5182259678840637, | |
| "learning_rate": 1.708670302802332e-05, | |
| "loss": 4.6991, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 0.6097771525382996, | |
| "learning_rate": 1.66165130712808e-05, | |
| "loss": 4.7022, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 0.49007490277290344, | |
| "learning_rate": 1.6146323114538276e-05, | |
| "loss": 4.7012, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 0.5180237293243408, | |
| "learning_rate": 1.567613315779575e-05, | |
| "loss": 4.6998, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 0.4303135275840759, | |
| "learning_rate": 1.5205943201053227e-05, | |
| "loss": 4.7025, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 0.6103301644325256, | |
| "learning_rate": 1.47357532443107e-05, | |
| "loss": 4.7027, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 0.5991978049278259, | |
| "learning_rate": 1.4265563287568178e-05, | |
| "loss": 4.7016, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 0.590713381767273, | |
| "learning_rate": 1.3795373330825653e-05, | |
| "loss": 4.6992, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 0.6496405601501465, | |
| "learning_rate": 1.332518337408313e-05, | |
| "loss": 4.7005, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 0.5405462980270386, | |
| "learning_rate": 1.2854993417340605e-05, | |
| "loss": 4.704, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.5655048489570618, | |
| "learning_rate": 1.2384803460598082e-05, | |
| "loss": 4.701, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 1.1468925476074219, | |
| "learning_rate": 1.1914613503855558e-05, | |
| "loss": 4.6547, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 3.2609410285949707, | |
| "learning_rate": 1.1444423547113034e-05, | |
| "loss": 4.6054, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 3.0458478927612305, | |
| "learning_rate": 1.097423359037051e-05, | |
| "loss": 4.5575, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 4.3110527992248535, | |
| "learning_rate": 1.0504043633627986e-05, | |
| "loss": 4.5948, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.7174781560897827, | |
| "learning_rate": 1.0033853676885462e-05, | |
| "loss": 4.5585, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 3.2902700901031494, | |
| "learning_rate": 9.56366372014294e-06, | |
| "loss": 4.5886, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 1.1743800640106201, | |
| "learning_rate": 9.093473763400415e-06, | |
| "loss": 4.5534, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 1.5308388471603394, | |
| "learning_rate": 8.623283806657891e-06, | |
| "loss": 4.547, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 2.839127540588379, | |
| "learning_rate": 8.153093849915366e-06, | |
| "loss": 4.5339, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 1.9120197296142578, | |
| "learning_rate": 7.682903893172842e-06, | |
| "loss": 4.5439, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 1.549185872077942, | |
| "learning_rate": 7.212713936430318e-06, | |
| "loss": 4.5276, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 2.3087658882141113, | |
| "learning_rate": 6.742523979687794e-06, | |
| "loss": 4.5081, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 1.4830306768417358, | |
| "learning_rate": 6.27233402294527e-06, | |
| "loss": 4.494, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 2.1838178634643555, | |
| "learning_rate": 5.802144066202746e-06, | |
| "loss": 4.4904, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 3.3637304306030273, | |
| "learning_rate": 5.331954109460222e-06, | |
| "loss": 4.4807, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 1.637593388557434, | |
| "learning_rate": 4.861764152717698e-06, | |
| "loss": 4.474, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 1.833465576171875, | |
| "learning_rate": 4.391574195975174e-06, | |
| "loss": 4.4719, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 2.6578447818756104, | |
| "learning_rate": 3.921384239232651e-06, | |
| "loss": 4.4593, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 2.121244192123413, | |
| "learning_rate": 3.451194282490126e-06, | |
| "loss": 4.4514, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 2.080278158187866, | |
| "learning_rate": 2.981004325747602e-06, | |
| "loss": 4.4407, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 2.2840731143951416, | |
| "learning_rate": 2.510814369005078e-06, | |
| "loss": 4.4289, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 2.203077554702759, | |
| "learning_rate": 2.0406244122625544e-06, | |
| "loss": 4.4071, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 2.4338624477386475, | |
| "learning_rate": 1.5704344555200301e-06, | |
| "loss": 4.4166, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 1.855125069618225, | |
| "learning_rate": 1.1002444987775063e-06, | |
| "loss": 4.4079, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 2.9419732093811035, | |
| "learning_rate": 6.300545420349821e-07, | |
| "loss": 4.4227, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 3.1695730686187744, | |
| "learning_rate": 1.5986458529245815e-07, | |
| "loss": 4.4244, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.022735346358792183, | |
| "eval_f1": 0.00212270922930165, | |
| "eval_loss": 4.410290241241455, | |
| "eval_precision": 0.0011595953132254675, | |
| "eval_recall": 0.022735346358792183, | |
| "eval_runtime": 96.0024, | |
| "eval_samples_per_second": 87.967, | |
| "eval_steps_per_second": 4.896, | |
| "step": 10634 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 10634, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "total_flos": 5.041092263921664e+16, | |
| "train_batch_size": 18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |