| { | |
| "best_metric": 0.28622403740882874, | |
| "best_model_checkpoint": "embed/MP_modbert_embed_voc1_0_100_MP/checkpoint-15728", | |
| "epoch": 100.0, | |
| "eval_steps": 500, | |
| "global_step": 98300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.508646998982706, | |
| "grad_norm": 0.3106672167778015, | |
| "learning_rate": 1.5259409969481182e-06, | |
| "loss": 1.541, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.2760541439056396, | |
| "eval_mae": 0.8460478782653809, | |
| "eval_r2": -7.616851806640625, | |
| "eval_rmse": 1.1296252012252808, | |
| "eval_runtime": 20.7046, | |
| "eval_samples_per_second": 482.984, | |
| "eval_steps_per_second": 3.816, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 1.017293997965412, | |
| "grad_norm": 0.6856330037117004, | |
| "learning_rate": 3.0518819938962364e-06, | |
| "loss": 1.3676, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.5259409969481181, | |
| "grad_norm": 1.0226482152938843, | |
| "learning_rate": 4.577822990844354e-06, | |
| "loss": 1.09, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.7750646471977234, | |
| "eval_mae": 0.6696741580963135, | |
| "eval_r2": -0.1967395395040512, | |
| "eval_rmse": 0.8803772330284119, | |
| "eval_runtime": 20.7332, | |
| "eval_samples_per_second": 482.319, | |
| "eval_steps_per_second": 3.81, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 2.034587995930824, | |
| "grad_norm": 1.0323240756988525, | |
| "learning_rate": 6.103763987792473e-06, | |
| "loss": 0.8467, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.5432349949135302, | |
| "grad_norm": 1.000579833984375, | |
| "learning_rate": 7.62970498474059e-06, | |
| "loss": 0.6796, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.5403278470039368, | |
| "eval_mae": 0.5509415864944458, | |
| "eval_r2": 0.3976656198501587, | |
| "eval_rmse": 0.7350696325302124, | |
| "eval_runtime": 19.1568, | |
| "eval_samples_per_second": 522.008, | |
| "eval_steps_per_second": 4.124, | |
| "step": 2949 | |
| }, | |
| { | |
| "epoch": 3.051881993896236, | |
| "grad_norm": 1.153286099433899, | |
| "learning_rate": 9.155645981688708e-06, | |
| "loss": 0.5626, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.560528992878942, | |
| "grad_norm": 1.2380833625793457, | |
| "learning_rate": 1.0681586978636825e-05, | |
| "loss": 0.4871, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.4411344826221466, | |
| "eval_mae": 0.4877315163612366, | |
| "eval_r2": 0.5509063005447388, | |
| "eval_rmse": 0.6641793847084045, | |
| "eval_runtime": 17.1276, | |
| "eval_samples_per_second": 583.854, | |
| "eval_steps_per_second": 4.612, | |
| "step": 3932 | |
| }, | |
| { | |
| "epoch": 4.069175991861648, | |
| "grad_norm": 1.0829737186431885, | |
| "learning_rate": 1.2207527975584946e-05, | |
| "loss": 0.4344, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.577822990844354, | |
| "grad_norm": 0.9966434240341187, | |
| "learning_rate": 1.3733468972533063e-05, | |
| "loss": 0.3966, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.391430139541626, | |
| "eval_mae": 0.44986167550086975, | |
| "eval_r2": 0.6214621067047119, | |
| "eval_rmse": 0.6256435513496399, | |
| "eval_runtime": 19.311, | |
| "eval_samples_per_second": 517.839, | |
| "eval_steps_per_second": 4.091, | |
| "step": 4915 | |
| }, | |
| { | |
| "epoch": 5.0864699898270604, | |
| "grad_norm": 1.2099027633666992, | |
| "learning_rate": 1.525940996948118e-05, | |
| "loss": 0.3747, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.595116988809766, | |
| "grad_norm": 1.3580559492111206, | |
| "learning_rate": 1.67853509664293e-05, | |
| "loss": 0.3501, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.35625067353248596, | |
| "eval_mae": 0.42443710565567017, | |
| "eval_r2": 0.6606752872467041, | |
| "eval_rmse": 0.5968672037124634, | |
| "eval_runtime": 19.6114, | |
| "eval_samples_per_second": 509.909, | |
| "eval_steps_per_second": 4.028, | |
| "step": 5898 | |
| }, | |
| { | |
| "epoch": 6.103763987792472, | |
| "grad_norm": 1.0308208465576172, | |
| "learning_rate": 1.8311291963377416e-05, | |
| "loss": 0.3332, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.612410986775178, | |
| "grad_norm": 1.0148053169250488, | |
| "learning_rate": 1.9837232960325533e-05, | |
| "loss": 0.3184, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.3325399160385132, | |
| "eval_mae": 0.4055442214012146, | |
| "eval_r2": 0.714428186416626, | |
| "eval_rmse": 0.576662540435791, | |
| "eval_runtime": 19.3745, | |
| "eval_samples_per_second": 516.143, | |
| "eval_steps_per_second": 4.078, | |
| "step": 6881 | |
| }, | |
| { | |
| "epoch": 7.121057985757884, | |
| "grad_norm": 1.2479807138442993, | |
| "learning_rate": 2.136317395727365e-05, | |
| "loss": 0.3061, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 7.62970498474059, | |
| "grad_norm": 0.9779444932937622, | |
| "learning_rate": 2.288911495422177e-05, | |
| "loss": 0.2946, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.3256845772266388, | |
| "eval_mae": 0.39594584703445435, | |
| "eval_r2": 0.7166282534599304, | |
| "eval_rmse": 0.5706875920295715, | |
| "eval_runtime": 20.9579, | |
| "eval_samples_per_second": 477.147, | |
| "eval_steps_per_second": 3.769, | |
| "step": 7864 | |
| }, | |
| { | |
| "epoch": 8.138351983723297, | |
| "grad_norm": 0.9078311324119568, | |
| "learning_rate": 2.441505595116989e-05, | |
| "loss": 0.2871, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 8.646998982706002, | |
| "grad_norm": 0.9875624775886536, | |
| "learning_rate": 2.594099694811801e-05, | |
| "loss": 0.276, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.31457769870758057, | |
| "eval_mae": 0.3851270079612732, | |
| "eval_r2": 0.7076315879821777, | |
| "eval_rmse": 0.5608720779418945, | |
| "eval_runtime": 18.1766, | |
| "eval_samples_per_second": 550.158, | |
| "eval_steps_per_second": 4.346, | |
| "step": 8847 | |
| }, | |
| { | |
| "epoch": 9.155645981688709, | |
| "grad_norm": 0.8023911714553833, | |
| "learning_rate": 2.7466937945066126e-05, | |
| "loss": 0.2669, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 9.664292980671414, | |
| "grad_norm": 0.846149206161499, | |
| "learning_rate": 2.8992878942014243e-05, | |
| "loss": 0.26, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.3037531077861786, | |
| "eval_mae": 0.37509840726852417, | |
| "eval_r2": 0.7389528155326843, | |
| "eval_rmse": 0.5511377453804016, | |
| "eval_runtime": 19.169, | |
| "eval_samples_per_second": 521.676, | |
| "eval_steps_per_second": 4.121, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 10.172939979654121, | |
| "grad_norm": 1.080586552619934, | |
| "learning_rate": 2.9942353340115293e-05, | |
| "loss": 0.2499, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 10.681586978636826, | |
| "grad_norm": 0.7065219879150391, | |
| "learning_rate": 2.9772804340454395e-05, | |
| "loss": 0.2452, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 0.2963975965976715, | |
| "eval_mae": 0.36469894647598267, | |
| "eval_r2": 0.7409036159515381, | |
| "eval_rmse": 0.5444238781929016, | |
| "eval_runtime": 19.1182, | |
| "eval_samples_per_second": 523.063, | |
| "eval_steps_per_second": 4.132, | |
| "step": 10813 | |
| }, | |
| { | |
| "epoch": 11.190233977619531, | |
| "grad_norm": 0.7577599287033081, | |
| "learning_rate": 2.960325534079349e-05, | |
| "loss": 0.2346, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 11.698880976602238, | |
| "grad_norm": 0.9281987547874451, | |
| "learning_rate": 2.9433706341132588e-05, | |
| "loss": 0.2283, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 0.2902641296386719, | |
| "eval_mae": 0.3583831191062927, | |
| "eval_r2": 0.7628405690193176, | |
| "eval_rmse": 0.5387614965438843, | |
| "eval_runtime": 20.6847, | |
| "eval_samples_per_second": 483.45, | |
| "eval_steps_per_second": 3.819, | |
| "step": 11796 | |
| }, | |
| { | |
| "epoch": 12.207527975584943, | |
| "grad_norm": 0.7825191617012024, | |
| "learning_rate": 2.9264157341471686e-05, | |
| "loss": 0.2218, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 12.71617497456765, | |
| "grad_norm": 0.7279273867607117, | |
| "learning_rate": 2.9094608341810784e-05, | |
| "loss": 0.2145, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 0.2926931083202362, | |
| "eval_mae": 0.35721156001091003, | |
| "eval_r2": 0.7634265422821045, | |
| "eval_rmse": 0.5410109758377075, | |
| "eval_runtime": 19.8102, | |
| "eval_samples_per_second": 504.789, | |
| "eval_steps_per_second": 3.988, | |
| "step": 12779 | |
| }, | |
| { | |
| "epoch": 13.224821973550355, | |
| "grad_norm": 0.7883431911468506, | |
| "learning_rate": 2.8925059342149882e-05, | |
| "loss": 0.2086, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 13.733468972533062, | |
| "grad_norm": 0.7794452905654907, | |
| "learning_rate": 2.875551034248898e-05, | |
| "loss": 0.2023, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 0.2874827980995178, | |
| "eval_mae": 0.3508231043815613, | |
| "eval_r2": 0.7408859133720398, | |
| "eval_rmse": 0.5361739993095398, | |
| "eval_runtime": 19.5699, | |
| "eval_samples_per_second": 510.99, | |
| "eval_steps_per_second": 4.037, | |
| "step": 13762 | |
| }, | |
| { | |
| "epoch": 14.242115971515767, | |
| "grad_norm": 0.6752628684043884, | |
| "learning_rate": 2.8585961342828075e-05, | |
| "loss": 0.1954, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 14.750762970498474, | |
| "grad_norm": 0.7037671804428101, | |
| "learning_rate": 2.8416412343167176e-05, | |
| "loss": 0.1927, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 0.2897137701511383, | |
| "eval_mae": 0.34881192445755005, | |
| "eval_r2": 0.7613890171051025, | |
| "eval_rmse": 0.5382503867149353, | |
| "eval_runtime": 20.9472, | |
| "eval_samples_per_second": 477.39, | |
| "eval_steps_per_second": 3.771, | |
| "step": 14745 | |
| }, | |
| { | |
| "epoch": 15.25940996948118, | |
| "grad_norm": 0.7775920033454895, | |
| "learning_rate": 2.8246863343506274e-05, | |
| "loss": 0.1852, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 15.768056968463886, | |
| "grad_norm": 0.6624333262443542, | |
| "learning_rate": 2.8077314343845372e-05, | |
| "loss": 0.1846, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 0.28622403740882874, | |
| "eval_mae": 0.34425806999206543, | |
| "eval_r2": 0.7593017816543579, | |
| "eval_rmse": 0.5349988341331482, | |
| "eval_runtime": 19.8037, | |
| "eval_samples_per_second": 504.956, | |
| "eval_steps_per_second": 3.989, | |
| "step": 15728 | |
| }, | |
| { | |
| "epoch": 16.276703967446593, | |
| "grad_norm": 0.6602976322174072, | |
| "learning_rate": 2.790776534418447e-05, | |
| "loss": 0.1776, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 16.7853509664293, | |
| "grad_norm": 0.6582776308059692, | |
| "learning_rate": 2.773821634452357e-05, | |
| "loss": 0.1762, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 0.28853750228881836, | |
| "eval_mae": 0.3421522378921509, | |
| "eval_r2": 0.7615672945976257, | |
| "eval_rmse": 0.5371565818786621, | |
| "eval_runtime": 20.8644, | |
| "eval_samples_per_second": 479.286, | |
| "eval_steps_per_second": 3.786, | |
| "step": 16711 | |
| }, | |
| { | |
| "epoch": 17.293997965412004, | |
| "grad_norm": 0.7847622632980347, | |
| "learning_rate": 2.7568667344862667e-05, | |
| "loss": 0.1677, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 17.80264496439471, | |
| "grad_norm": 0.7339199185371399, | |
| "learning_rate": 2.739911834520176e-05, | |
| "loss": 0.1694, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 0.29071176052093506, | |
| "eval_mae": 0.34257909655570984, | |
| "eval_r2": 0.7714438438415527, | |
| "eval_rmse": 0.5391767024993896, | |
| "eval_runtime": 19.1023, | |
| "eval_samples_per_second": 523.496, | |
| "eval_steps_per_second": 4.136, | |
| "step": 17694 | |
| }, | |
| { | |
| "epoch": 18.311291963377418, | |
| "grad_norm": 0.7355333566665649, | |
| "learning_rate": 2.7229569345540863e-05, | |
| "loss": 0.164, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 18.819938962360123, | |
| "grad_norm": 0.6125873923301697, | |
| "learning_rate": 2.7060020345879958e-05, | |
| "loss": 0.1621, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 0.2921219766139984, | |
| "eval_mae": 0.34166020154953003, | |
| "eval_r2": 0.771050214767456, | |
| "eval_rmse": 0.5404828786849976, | |
| "eval_runtime": 18.7584, | |
| "eval_samples_per_second": 533.096, | |
| "eval_steps_per_second": 4.211, | |
| "step": 18677 | |
| }, | |
| { | |
| "epoch": 19.328585961342828, | |
| "grad_norm": 0.533932626247406, | |
| "learning_rate": 2.689047134621906e-05, | |
| "loss": 0.1574, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 19.837232960325533, | |
| "grad_norm": 0.5610256791114807, | |
| "learning_rate": 2.6720922346558154e-05, | |
| "loss": 0.1585, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 0.2904634177684784, | |
| "eval_mae": 0.3388892710208893, | |
| "eval_r2": 0.759547233581543, | |
| "eval_rmse": 0.5389463305473328, | |
| "eval_runtime": 18.6352, | |
| "eval_samples_per_second": 536.618, | |
| "eval_steps_per_second": 4.239, | |
| "step": 19660 | |
| }, | |
| { | |
| "epoch": 20.345879959308242, | |
| "grad_norm": 0.639903724193573, | |
| "learning_rate": 2.6551373346897255e-05, | |
| "loss": 0.1518, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 20.854526958290947, | |
| "grad_norm": 0.6409148573875427, | |
| "learning_rate": 2.6381824347236353e-05, | |
| "loss": 0.1538, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 0.28870195150375366, | |
| "eval_mae": 0.3359060287475586, | |
| "eval_r2": 0.7662752866744995, | |
| "eval_rmse": 0.5373095870018005, | |
| "eval_runtime": 19.483, | |
| "eval_samples_per_second": 513.267, | |
| "eval_steps_per_second": 4.055, | |
| "step": 20643 | |
| }, | |
| { | |
| "epoch": 21.363173957273652, | |
| "grad_norm": 0.6030653715133667, | |
| "learning_rate": 2.6212275347575448e-05, | |
| "loss": 0.1454, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 21.871820956256357, | |
| "grad_norm": 0.5733378529548645, | |
| "learning_rate": 2.604272634791455e-05, | |
| "loss": 0.1512, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 0.2888459265232086, | |
| "eval_mae": 0.336772620677948, | |
| "eval_r2": 0.7591472268104553, | |
| "eval_rmse": 0.5374436974525452, | |
| "eval_runtime": 19.7086, | |
| "eval_samples_per_second": 507.392, | |
| "eval_steps_per_second": 4.008, | |
| "step": 21626 | |
| }, | |
| { | |
| "epoch": 22.380467955239062, | |
| "grad_norm": 0.5780584812164307, | |
| "learning_rate": 2.5873177348253644e-05, | |
| "loss": 0.1426, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 22.88911495422177, | |
| "grad_norm": 0.6120862364768982, | |
| "learning_rate": 2.5703628348592746e-05, | |
| "loss": 0.1462, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 0.28724002838134766, | |
| "eval_mae": 0.3324105143547058, | |
| "eval_r2": 0.7722494602203369, | |
| "eval_rmse": 0.5359475016593933, | |
| "eval_runtime": 19.302, | |
| "eval_samples_per_second": 518.08, | |
| "eval_steps_per_second": 4.093, | |
| "step": 22609 | |
| }, | |
| { | |
| "epoch": 23.397761953204476, | |
| "grad_norm": 0.5332029461860657, | |
| "learning_rate": 2.553407934893184e-05, | |
| "loss": 0.1386, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 23.90640895218718, | |
| "grad_norm": 0.5000107884407043, | |
| "learning_rate": 2.5364530349270942e-05, | |
| "loss": 0.1438, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 0.2948116064071655, | |
| "eval_mae": 0.33423200249671936, | |
| "eval_r2": 0.771849513053894, | |
| "eval_rmse": 0.542965292930603, | |
| "eval_runtime": 16.3008, | |
| "eval_samples_per_second": 613.466, | |
| "eval_steps_per_second": 4.846, | |
| "step": 23592 | |
| }, | |
| { | |
| "epoch": 24.415055951169887, | |
| "grad_norm": 0.4723941385746002, | |
| "learning_rate": 2.5194981349610037e-05, | |
| "loss": 0.1351, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 24.923702950152595, | |
| "grad_norm": 0.5572139024734497, | |
| "learning_rate": 2.5025432349949135e-05, | |
| "loss": 0.1408, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 0.28881922364234924, | |
| "eval_mae": 0.3297838866710663, | |
| "eval_r2": 0.7694551944732666, | |
| "eval_rmse": 0.5374186635017395, | |
| "eval_runtime": 16.1445, | |
| "eval_samples_per_second": 619.406, | |
| "eval_steps_per_second": 4.893, | |
| "step": 24575 | |
| }, | |
| { | |
| "epoch": 25.4323499491353, | |
| "grad_norm": 0.5389395952224731, | |
| "learning_rate": 2.4855883350288233e-05, | |
| "loss": 0.1332, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 25.940996948118006, | |
| "grad_norm": 0.7782149910926819, | |
| "learning_rate": 2.468633435062733e-05, | |
| "loss": 0.1374, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 0.2942332625389099, | |
| "eval_mae": 0.3317820429801941, | |
| "eval_r2": 0.7754645347595215, | |
| "eval_rmse": 0.5424323081970215, | |
| "eval_runtime": 15.3961, | |
| "eval_samples_per_second": 649.517, | |
| "eval_steps_per_second": 5.131, | |
| "step": 25558 | |
| }, | |
| { | |
| "epoch": 26.44964394710071, | |
| "grad_norm": 0.462462455034256, | |
| "learning_rate": 2.4516785350966432e-05, | |
| "loss": 0.1295, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 26.95829094608342, | |
| "grad_norm": 0.5143025517463684, | |
| "learning_rate": 2.4347236351305527e-05, | |
| "loss": 0.137, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 0.2935540974140167, | |
| "eval_mae": 0.330686092376709, | |
| "eval_r2": 0.767002284526825, | |
| "eval_rmse": 0.5418060421943665, | |
| "eval_runtime": 15.7155, | |
| "eval_samples_per_second": 636.314, | |
| "eval_steps_per_second": 5.027, | |
| "step": 26541 | |
| }, | |
| { | |
| "epoch": 27.466937945066125, | |
| "grad_norm": 0.4412664473056793, | |
| "learning_rate": 2.417768735164463e-05, | |
| "loss": 0.1266, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 27.97558494404883, | |
| "grad_norm": 0.5299028754234314, | |
| "learning_rate": 2.4008138351983723e-05, | |
| "loss": 0.1345, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 0.29448285698890686, | |
| "eval_mae": 0.33093565702438354, | |
| "eval_r2": 0.7710368633270264, | |
| "eval_rmse": 0.5426624417304993, | |
| "eval_runtime": 15.225, | |
| "eval_samples_per_second": 656.813, | |
| "eval_steps_per_second": 5.189, | |
| "step": 27524 | |
| }, | |
| { | |
| "epoch": 28.484231943031535, | |
| "grad_norm": 0.3932570219039917, | |
| "learning_rate": 2.383858935232282e-05, | |
| "loss": 0.126, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 28.992878942014244, | |
| "grad_norm": 0.5242863893508911, | |
| "learning_rate": 2.366904035266192e-05, | |
| "loss": 0.1295, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_loss": 0.29432228207588196, | |
| "eval_mae": 0.3288809657096863, | |
| "eval_r2": 0.7685636878013611, | |
| "eval_rmse": 0.542514443397522, | |
| "eval_runtime": 15.4546, | |
| "eval_samples_per_second": 647.058, | |
| "eval_steps_per_second": 5.112, | |
| "step": 28507 | |
| }, | |
| { | |
| "epoch": 29.50152594099695, | |
| "grad_norm": 0.4471481144428253, | |
| "learning_rate": 2.3499491353001018e-05, | |
| "loss": 0.1237, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 0.2978579103946686, | |
| "eval_mae": 0.3290804624557495, | |
| "eval_r2": 0.7655062675476074, | |
| "eval_rmse": 0.5457631945610046, | |
| "eval_runtime": 15.0561, | |
| "eval_samples_per_second": 664.183, | |
| "eval_steps_per_second": 5.247, | |
| "step": 29490 | |
| }, | |
| { | |
| "epoch": 30.010172939979654, | |
| "grad_norm": 0.5170900821685791, | |
| "learning_rate": 2.3329942353340116e-05, | |
| "loss": 0.1289, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 30.51881993896236, | |
| "grad_norm": 0.4907204806804657, | |
| "learning_rate": 2.3160393353679214e-05, | |
| "loss": 0.1207, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_loss": 0.294801265001297, | |
| "eval_mae": 0.32844942808151245, | |
| "eval_r2": 0.7698482275009155, | |
| "eval_rmse": 0.542955756187439, | |
| "eval_runtime": 15.6533, | |
| "eval_samples_per_second": 638.844, | |
| "eval_steps_per_second": 5.047, | |
| "step": 30473 | |
| }, | |
| { | |
| "epoch": 31.027466937945068, | |
| "grad_norm": 0.4819788932800293, | |
| "learning_rate": 2.2990844354018312e-05, | |
| "loss": 0.1285, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 31.536113936927773, | |
| "grad_norm": 0.48075565695762634, | |
| "learning_rate": 2.282129535435741e-05, | |
| "loss": 0.1194, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_loss": 0.2976591885089874, | |
| "eval_mae": 0.3284505605697632, | |
| "eval_r2": 0.770287275314331, | |
| "eval_rmse": 0.5455812811851501, | |
| "eval_runtime": 16.4511, | |
| "eval_samples_per_second": 607.864, | |
| "eval_steps_per_second": 4.802, | |
| "step": 31456 | |
| }, | |
| { | |
| "epoch": 32.044760935910475, | |
| "grad_norm": 0.4494258165359497, | |
| "learning_rate": 2.2651746354696508e-05, | |
| "loss": 0.1254, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 32.55340793489319, | |
| "grad_norm": 0.4671981632709503, | |
| "learning_rate": 2.2482197355035606e-05, | |
| "loss": 0.1193, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_loss": 0.29970449209213257, | |
| "eval_mae": 0.32844069600105286, | |
| "eval_r2": 0.7684862613677979, | |
| "eval_rmse": 0.5474523305892944, | |
| "eval_runtime": 16.4076, | |
| "eval_samples_per_second": 609.473, | |
| "eval_steps_per_second": 4.815, | |
| "step": 32439 | |
| }, | |
| { | |
| "epoch": 33.06205493387589, | |
| "grad_norm": 0.37166231870651245, | |
| "learning_rate": 2.2312648355374704e-05, | |
| "loss": 0.1237, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 33.5707019328586, | |
| "grad_norm": 0.34825435280799866, | |
| "learning_rate": 2.2143099355713802e-05, | |
| "loss": 0.1178, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_loss": 0.30024948716163635, | |
| "eval_mae": 0.3275962471961975, | |
| "eval_r2": 0.7728116512298584, | |
| "eval_rmse": 0.5479499697685242, | |
| "eval_runtime": 18.1242, | |
| "eval_samples_per_second": 551.747, | |
| "eval_steps_per_second": 4.359, | |
| "step": 33422 | |
| }, | |
| { | |
| "epoch": 34.0793489318413, | |
| "grad_norm": 0.41295087337493896, | |
| "learning_rate": 2.19735503560529e-05, | |
| "loss": 0.1216, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 34.58799593082401, | |
| "grad_norm": 0.4062606692314148, | |
| "learning_rate": 2.1804001356392e-05, | |
| "loss": 0.1172, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_loss": 0.2986757755279541, | |
| "eval_mae": 0.3264056444168091, | |
| "eval_r2": 0.7658240795135498, | |
| "eval_rmse": 0.5465120077133179, | |
| "eval_runtime": 18.6496, | |
| "eval_samples_per_second": 536.206, | |
| "eval_steps_per_second": 4.236, | |
| "step": 34405 | |
| }, | |
| { | |
| "epoch": 35.09664292980671, | |
| "grad_norm": 0.4476313889026642, | |
| "learning_rate": 2.1634452356731097e-05, | |
| "loss": 0.1198, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 35.60528992878942, | |
| "grad_norm": 0.29763707518577576, | |
| "learning_rate": 2.146490335707019e-05, | |
| "loss": 0.1165, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_loss": 0.30164480209350586, | |
| "eval_mae": 0.3274855315685272, | |
| "eval_r2": 0.7613018751144409, | |
| "eval_rmse": 0.5492217540740967, | |
| "eval_runtime": 18.3514, | |
| "eval_samples_per_second": 544.919, | |
| "eval_steps_per_second": 4.305, | |
| "step": 35388 | |
| }, | |
| { | |
| "epoch": 36.11393692777212, | |
| "grad_norm": 0.3980378806591034, | |
| "learning_rate": 2.1295354357409293e-05, | |
| "loss": 0.1178, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 36.622583926754835, | |
| "grad_norm": 0.48255112767219543, | |
| "learning_rate": 2.1125805357748388e-05, | |
| "loss": 0.1135, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_loss": 0.30013135075569153, | |
| "eval_mae": 0.3258868455886841, | |
| "eval_r2": 0.7733784914016724, | |
| "eval_rmse": 0.5478420853614807, | |
| "eval_runtime": 20.577, | |
| "eval_samples_per_second": 485.98, | |
| "eval_steps_per_second": 3.839, | |
| "step": 36371 | |
| }, | |
| { | |
| "epoch": 37.13123092573754, | |
| "grad_norm": 0.3691520392894745, | |
| "learning_rate": 2.095625635808749e-05, | |
| "loss": 0.1176, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 37.639877924720246, | |
| "grad_norm": 0.36071911454200745, | |
| "learning_rate": 2.0786707358426584e-05, | |
| "loss": 0.1146, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_loss": 0.30118992924690247, | |
| "eval_mae": 0.327684223651886, | |
| "eval_r2": 0.7721153497695923, | |
| "eval_rmse": 0.5488075017929077, | |
| "eval_runtime": 19.8739, | |
| "eval_samples_per_second": 503.173, | |
| "eval_steps_per_second": 3.975, | |
| "step": 37354 | |
| }, | |
| { | |
| "epoch": 38.14852492370295, | |
| "grad_norm": 0.3069157898426056, | |
| "learning_rate": 2.0617158358765685e-05, | |
| "loss": 0.1145, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 38.657171922685656, | |
| "grad_norm": 0.36026087403297424, | |
| "learning_rate": 2.0447609359104783e-05, | |
| "loss": 0.1139, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_loss": 0.2999809980392456, | |
| "eval_mae": 0.3254566788673401, | |
| "eval_r2": 0.7663213610649109, | |
| "eval_rmse": 0.5477048754692078, | |
| "eval_runtime": 18.5952, | |
| "eval_samples_per_second": 537.773, | |
| "eval_steps_per_second": 4.248, | |
| "step": 38337 | |
| }, | |
| { | |
| "epoch": 39.16581892166836, | |
| "grad_norm": 0.35690009593963623, | |
| "learning_rate": 2.0278060359443878e-05, | |
| "loss": 0.1149, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 39.674465920651066, | |
| "grad_norm": 0.34920957684516907, | |
| "learning_rate": 2.010851135978298e-05, | |
| "loss": 0.1136, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 0.3011326789855957, | |
| "eval_mae": 0.3251156210899353, | |
| "eval_r2": 0.7704006433486938, | |
| "eval_rmse": 0.5487552285194397, | |
| "eval_runtime": 17.6999, | |
| "eval_samples_per_second": 564.975, | |
| "eval_steps_per_second": 4.463, | |
| "step": 39320 | |
| }, | |
| { | |
| "epoch": 40.18311291963377, | |
| "grad_norm": 0.3468838036060333, | |
| "learning_rate": 1.9938962360122074e-05, | |
| "loss": 0.1121, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 40.691759918616484, | |
| "grad_norm": 0.4196014702320099, | |
| "learning_rate": 1.9769413360461176e-05, | |
| "loss": 0.1122, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_loss": 0.30130186676979065, | |
| "eval_mae": 0.324237585067749, | |
| "eval_r2": 0.7646907567977905, | |
| "eval_rmse": 0.5489093661308289, | |
| "eval_runtime": 17.2647, | |
| "eval_samples_per_second": 579.217, | |
| "eval_steps_per_second": 4.576, | |
| "step": 40303 | |
| }, | |
| { | |
| "epoch": 41.20040691759919, | |
| "grad_norm": 0.3917727470397949, | |
| "learning_rate": 1.959986436080027e-05, | |
| "loss": 0.1134, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 41.709053916581894, | |
| "grad_norm": 0.3163571059703827, | |
| "learning_rate": 1.9430315361139372e-05, | |
| "loss": 0.1093, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_loss": 0.30112671852111816, | |
| "eval_mae": 0.3250886797904968, | |
| "eval_r2": 0.7695274353027344, | |
| "eval_rmse": 0.5487497448921204, | |
| "eval_runtime": 17.9807, | |
| "eval_samples_per_second": 556.152, | |
| "eval_steps_per_second": 4.394, | |
| "step": 41286 | |
| }, | |
| { | |
| "epoch": 42.2177009155646, | |
| "grad_norm": 0.34252750873565674, | |
| "learning_rate": 1.9260766361478467e-05, | |
| "loss": 0.112, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 42.726347914547304, | |
| "grad_norm": 0.39978763461112976, | |
| "learning_rate": 1.9091217361817565e-05, | |
| "loss": 0.1105, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_loss": 0.30118075013160706, | |
| "eval_mae": 0.32392618060112, | |
| "eval_r2": 0.7685805559158325, | |
| "eval_rmse": 0.5487990379333496, | |
| "eval_runtime": 19.3392, | |
| "eval_samples_per_second": 517.085, | |
| "eval_steps_per_second": 4.085, | |
| "step": 42269 | |
| }, | |
| { | |
| "epoch": 43.23499491353001, | |
| "grad_norm": 0.38831090927124023, | |
| "learning_rate": 1.8921668362156663e-05, | |
| "loss": 0.1105, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 43.743641912512714, | |
| "grad_norm": 0.3529709577560425, | |
| "learning_rate": 1.875211936249576e-05, | |
| "loss": 0.111, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_loss": 0.3047633171081543, | |
| "eval_mae": 0.3260849118232727, | |
| "eval_r2": 0.7712223529815674, | |
| "eval_rmse": 0.5520533323287964, | |
| "eval_runtime": 17.5503, | |
| "eval_samples_per_second": 569.792, | |
| "eval_steps_per_second": 4.501, | |
| "step": 43252 | |
| }, | |
| { | |
| "epoch": 44.25228891149542, | |
| "grad_norm": 0.33766278624534607, | |
| "learning_rate": 1.8582570362834862e-05, | |
| "loss": 0.1084, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 44.760935910478125, | |
| "grad_norm": 0.4114132225513458, | |
| "learning_rate": 1.8413021363173957e-05, | |
| "loss": 0.1108, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_loss": 0.3017372488975525, | |
| "eval_mae": 0.32358628511428833, | |
| "eval_r2": 0.7659143209457397, | |
| "eval_rmse": 0.5493058562278748, | |
| "eval_runtime": 19.7823, | |
| "eval_samples_per_second": 505.501, | |
| "eval_steps_per_second": 3.993, | |
| "step": 44235 | |
| }, | |
| { | |
| "epoch": 45.26958290946084, | |
| "grad_norm": 0.37686386704444885, | |
| "learning_rate": 1.824347236351306e-05, | |
| "loss": 0.1068, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 45.77822990844354, | |
| "grad_norm": 0.32547298073768616, | |
| "learning_rate": 1.8073923363852153e-05, | |
| "loss": 0.1091, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_loss": 0.30219876766204834, | |
| "eval_mae": 0.3234938383102417, | |
| "eval_r2": 0.7667394280433655, | |
| "eval_rmse": 0.5497257709503174, | |
| "eval_runtime": 21.0895, | |
| "eval_samples_per_second": 474.169, | |
| "eval_steps_per_second": 3.746, | |
| "step": 45218 | |
| }, | |
| { | |
| "epoch": 46.28687690742625, | |
| "grad_norm": 0.3565451502799988, | |
| "learning_rate": 1.790437436419125e-05, | |
| "loss": 0.1074, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 46.79552390640895, | |
| "grad_norm": 0.3059617877006531, | |
| "learning_rate": 1.773482536453035e-05, | |
| "loss": 0.1085, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_loss": 0.300829142332077, | |
| "eval_mae": 0.32321953773498535, | |
| "eval_r2": 0.7664545178413391, | |
| "eval_rmse": 0.5484786033630371, | |
| "eval_runtime": 26.1836, | |
| "eval_samples_per_second": 381.918, | |
| "eval_steps_per_second": 3.017, | |
| "step": 46201 | |
| }, | |
| { | |
| "epoch": 47.30417090539166, | |
| "grad_norm": 0.37008407711982727, | |
| "learning_rate": 1.7565276364869448e-05, | |
| "loss": 0.1069, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 47.81281790437436, | |
| "grad_norm": 0.36229032278060913, | |
| "learning_rate": 1.7395727365208546e-05, | |
| "loss": 0.1082, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_loss": 0.30168837308883667, | |
| "eval_mae": 0.32336822152137756, | |
| "eval_r2": 0.7664652466773987, | |
| "eval_rmse": 0.5492613315582275, | |
| "eval_runtime": 23.5481, | |
| "eval_samples_per_second": 424.663, | |
| "eval_steps_per_second": 3.355, | |
| "step": 47184 | |
| }, | |
| { | |
| "epoch": 48.32146490335707, | |
| "grad_norm": 0.2914319932460785, | |
| "learning_rate": 1.7226178365547644e-05, | |
| "loss": 0.1041, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 48.83011190233977, | |
| "grad_norm": 0.3244343101978302, | |
| "learning_rate": 1.705662936588674e-05, | |
| "loss": 0.1084, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_loss": 0.3029985725879669, | |
| "eval_mae": 0.32335370779037476, | |
| "eval_r2": 0.76764976978302, | |
| "eval_rmse": 0.550452709197998, | |
| "eval_runtime": 21.4309, | |
| "eval_samples_per_second": 466.616, | |
| "eval_steps_per_second": 3.686, | |
| "step": 48167 | |
| }, | |
| { | |
| "epoch": 49.338758901322485, | |
| "grad_norm": 0.3769769072532654, | |
| "learning_rate": 1.688708036622584e-05, | |
| "loss": 0.1051, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 49.84740590030519, | |
| "grad_norm": 0.3235186040401459, | |
| "learning_rate": 1.6717531366564938e-05, | |
| "loss": 0.108, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_loss": 0.3056350648403168, | |
| "eval_mae": 0.3234601318836212, | |
| "eval_r2": 0.7742241621017456, | |
| "eval_rmse": 0.552842378616333, | |
| "eval_runtime": 21.2669, | |
| "eval_samples_per_second": 470.213, | |
| "eval_steps_per_second": 3.715, | |
| "step": 49150 | |
| }, | |
| { | |
| "epoch": 50.356052899287896, | |
| "grad_norm": 0.32111743092536926, | |
| "learning_rate": 1.6547982366904036e-05, | |
| "loss": 0.1023, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 50.8646998982706, | |
| "grad_norm": 0.35004922747612, | |
| "learning_rate": 1.6378433367243134e-05, | |
| "loss": 0.1065, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_loss": 0.3032062351703644, | |
| "eval_mae": 0.3231772184371948, | |
| "eval_r2": 0.7666732668876648, | |
| "eval_rmse": 0.5506412982940674, | |
| "eval_runtime": 22.5263, | |
| "eval_samples_per_second": 443.925, | |
| "eval_steps_per_second": 3.507, | |
| "step": 50133 | |
| }, | |
| { | |
| "epoch": 51.373346897253306, | |
| "grad_norm": 0.3384553790092468, | |
| "learning_rate": 1.6208884367582232e-05, | |
| "loss": 0.105, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 51.88199389623601, | |
| "grad_norm": 0.30764466524124146, | |
| "learning_rate": 1.603933536792133e-05, | |
| "loss": 0.1061, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_loss": 0.3021974265575409, | |
| "eval_mae": 0.32215458154678345, | |
| "eval_r2": 0.7661508321762085, | |
| "eval_rmse": 0.5497245192527771, | |
| "eval_runtime": 21.0716, | |
| "eval_samples_per_second": 474.572, | |
| "eval_steps_per_second": 3.749, | |
| "step": 51116 | |
| }, | |
| { | |
| "epoch": 52.390640895218716, | |
| "grad_norm": 0.32773980498313904, | |
| "learning_rate": 1.5869786368260425e-05, | |
| "loss": 0.1022, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 52.89928789420142, | |
| "grad_norm": 0.3055395185947418, | |
| "learning_rate": 1.5700237368599527e-05, | |
| "loss": 0.1065, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_loss": 0.3047163188457489, | |
| "eval_mae": 0.32322755455970764, | |
| "eval_r2": 0.7697006464004517, | |
| "eval_rmse": 0.552010715007782, | |
| "eval_runtime": 20.1406, | |
| "eval_samples_per_second": 496.509, | |
| "eval_steps_per_second": 3.922, | |
| "step": 52099 | |
| }, | |
| { | |
| "epoch": 53.407934893184134, | |
| "grad_norm": 0.25924962759017944, | |
| "learning_rate": 1.553068836893862e-05, | |
| "loss": 0.1022, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 53.91658189216684, | |
| "grad_norm": 0.29001641273498535, | |
| "learning_rate": 1.5361139369277723e-05, | |
| "loss": 0.1062, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_loss": 0.3048439025878906, | |
| "eval_mae": 0.32225465774536133, | |
| "eval_r2": 0.7680661678314209, | |
| "eval_rmse": 0.5521263480186462, | |
| "eval_runtime": 22.1784, | |
| "eval_samples_per_second": 450.889, | |
| "eval_steps_per_second": 3.562, | |
| "step": 53082 | |
| }, | |
| { | |
| "epoch": 54.425228891149544, | |
| "grad_norm": 0.2913689911365509, | |
| "learning_rate": 1.519159036961682e-05, | |
| "loss": 0.1012, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 54.93387589013225, | |
| "grad_norm": 0.27742624282836914, | |
| "learning_rate": 1.5022041369955919e-05, | |
| "loss": 0.1046, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_loss": 0.30284586548805237, | |
| "eval_mae": 0.3212890028953552, | |
| "eval_r2": 0.765329122543335, | |
| "eval_rmse": 0.5503140091896057, | |
| "eval_runtime": 21.5879, | |
| "eval_samples_per_second": 463.223, | |
| "eval_steps_per_second": 3.659, | |
| "step": 54065 | |
| }, | |
| { | |
| "epoch": 55.442522889114954, | |
| "grad_norm": 0.24787943065166473, | |
| "learning_rate": 1.4852492370295015e-05, | |
| "loss": 0.1032, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 55.95116988809766, | |
| "grad_norm": 0.3004557192325592, | |
| "learning_rate": 1.4682943370634113e-05, | |
| "loss": 0.1034, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_loss": 0.30553364753723145, | |
| "eval_mae": 0.3226454555988312, | |
| "eval_r2": 0.7709420919418335, | |
| "eval_rmse": 0.5527505874633789, | |
| "eval_runtime": 20.8948, | |
| "eval_samples_per_second": 478.589, | |
| "eval_steps_per_second": 3.781, | |
| "step": 55048 | |
| }, | |
| { | |
| "epoch": 56.459816887080365, | |
| "grad_norm": 0.290238618850708, | |
| "learning_rate": 1.4513394370973212e-05, | |
| "loss": 0.1011, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 56.96846388606307, | |
| "grad_norm": 0.34532827138900757, | |
| "learning_rate": 1.434384537131231e-05, | |
| "loss": 0.1047, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_loss": 0.3043980598449707, | |
| "eval_mae": 0.32213300466537476, | |
| "eval_r2": 0.764991283416748, | |
| "eval_rmse": 0.5517224073410034, | |
| "eval_runtime": 21.1266, | |
| "eval_samples_per_second": 473.337, | |
| "eval_steps_per_second": 3.739, | |
| "step": 56031 | |
| }, | |
| { | |
| "epoch": 57.477110885045775, | |
| "grad_norm": 0.2498069405555725, | |
| "learning_rate": 1.4174296371651406e-05, | |
| "loss": 0.0988, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 57.98575788402849, | |
| "grad_norm": 0.34196507930755615, | |
| "learning_rate": 1.4004747371990506e-05, | |
| "loss": 0.1048, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_loss": 0.3069715201854706, | |
| "eval_mae": 0.3227607011795044, | |
| "eval_r2": 0.7700543403625488, | |
| "eval_rmse": 0.5540497303009033, | |
| "eval_runtime": 22.6147, | |
| "eval_samples_per_second": 442.191, | |
| "eval_steps_per_second": 3.493, | |
| "step": 57014 | |
| }, | |
| { | |
| "epoch": 58.49440488301119, | |
| "grad_norm": 0.2622218132019043, | |
| "learning_rate": 1.3835198372329604e-05, | |
| "loss": 0.0996, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_loss": 0.30414652824401855, | |
| "eval_mae": 0.3215589225292206, | |
| "eval_r2": 0.769301176071167, | |
| "eval_rmse": 0.5514944791793823, | |
| "eval_runtime": 21.6832, | |
| "eval_samples_per_second": 461.187, | |
| "eval_steps_per_second": 3.643, | |
| "step": 57997 | |
| }, | |
| { | |
| "epoch": 59.0030518819939, | |
| "grad_norm": 0.2528667151927948, | |
| "learning_rate": 1.3665649372668702e-05, | |
| "loss": 0.104, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 59.5116988809766, | |
| "grad_norm": 0.3110768795013428, | |
| "learning_rate": 1.34961003730078e-05, | |
| "loss": 0.1009, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_loss": 0.30486321449279785, | |
| "eval_mae": 0.3213992714881897, | |
| "eval_r2": 0.7690669298171997, | |
| "eval_rmse": 0.5521438121795654, | |
| "eval_runtime": 22.881, | |
| "eval_samples_per_second": 437.043, | |
| "eval_steps_per_second": 3.453, | |
| "step": 58980 | |
| }, | |
| { | |
| "epoch": 60.02034587995931, | |
| "grad_norm": 0.23977969586849213, | |
| "learning_rate": 1.3326551373346898e-05, | |
| "loss": 0.1014, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 60.52899287894201, | |
| "grad_norm": 0.28639206290245056, | |
| "learning_rate": 1.3157002373685996e-05, | |
| "loss": 0.0985, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "eval_loss": 0.30517521500587463, | |
| "eval_mae": 0.3211789131164551, | |
| "eval_r2": 0.7687903642654419, | |
| "eval_rmse": 0.552426278591156, | |
| "eval_runtime": 22.942, | |
| "eval_samples_per_second": 435.882, | |
| "eval_steps_per_second": 3.443, | |
| "step": 59963 | |
| }, | |
| { | |
| "epoch": 61.03763987792472, | |
| "grad_norm": 0.25792455673217773, | |
| "learning_rate": 1.2987453374025093e-05, | |
| "loss": 0.1033, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 61.54628687690742, | |
| "grad_norm": 0.2544417977333069, | |
| "learning_rate": 1.281790437436419e-05, | |
| "loss": 0.0986, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_loss": 0.3043474555015564, | |
| "eval_mae": 0.320587694644928, | |
| "eval_r2": 0.7671276330947876, | |
| "eval_rmse": 0.5516765713691711, | |
| "eval_runtime": 22.599, | |
| "eval_samples_per_second": 442.498, | |
| "eval_steps_per_second": 3.496, | |
| "step": 60946 | |
| }, | |
| { | |
| "epoch": 62.054933875890136, | |
| "grad_norm": 0.23652108013629913, | |
| "learning_rate": 1.2648355374703289e-05, | |
| "loss": 0.1016, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 62.56358087487284, | |
| "grad_norm": 0.2460223287343979, | |
| "learning_rate": 1.2478806375042387e-05, | |
| "loss": 0.0974, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "eval_loss": 0.3064461946487427, | |
| "eval_mae": 0.3217301368713379, | |
| "eval_r2": 0.7658741474151611, | |
| "eval_rmse": 0.5535755157470703, | |
| "eval_runtime": 21.5823, | |
| "eval_samples_per_second": 463.343, | |
| "eval_steps_per_second": 3.66, | |
| "step": 61929 | |
| }, | |
| { | |
| "epoch": 63.072227873855546, | |
| "grad_norm": 0.263189435005188, | |
| "learning_rate": 1.2309257375381485e-05, | |
| "loss": 0.102, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 63.58087487283825, | |
| "grad_norm": 0.28210920095443726, | |
| "learning_rate": 1.2139708375720583e-05, | |
| "loss": 0.1002, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_loss": 0.30489081144332886, | |
| "eval_mae": 0.3209025263786316, | |
| "eval_r2": 0.7646889686584473, | |
| "eval_rmse": 0.5521687865257263, | |
| "eval_runtime": 22.5558, | |
| "eval_samples_per_second": 443.346, | |
| "eval_steps_per_second": 3.502, | |
| "step": 62912 | |
| }, | |
| { | |
| "epoch": 64.08952187182095, | |
| "grad_norm": 0.28177881240844727, | |
| "learning_rate": 1.1970159376059683e-05, | |
| "loss": 0.1, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 64.59816887080366, | |
| "grad_norm": 0.2723771035671234, | |
| "learning_rate": 1.180061037639878e-05, | |
| "loss": 0.0984, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "eval_loss": 0.3055484890937805, | |
| "eval_mae": 0.3205299973487854, | |
| "eval_r2": 0.7690364718437195, | |
| "eval_rmse": 0.5527639985084534, | |
| "eval_runtime": 23.8261, | |
| "eval_samples_per_second": 419.707, | |
| "eval_steps_per_second": 3.316, | |
| "step": 63895 | |
| }, | |
| { | |
| "epoch": 65.10681586978637, | |
| "grad_norm": 0.2683132588863373, | |
| "learning_rate": 1.1631061376737877e-05, | |
| "loss": 0.0996, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 65.61546286876907, | |
| "grad_norm": 0.2997967004776001, | |
| "learning_rate": 1.1461512377076976e-05, | |
| "loss": 0.0985, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_loss": 0.30802544951438904, | |
| "eval_mae": 0.3224177062511444, | |
| "eval_r2": 0.7694234848022461, | |
| "eval_rmse": 0.5550000071525574, | |
| "eval_runtime": 22.5937, | |
| "eval_samples_per_second": 442.602, | |
| "eval_steps_per_second": 3.497, | |
| "step": 64878 | |
| }, | |
| { | |
| "epoch": 66.12410986775178, | |
| "grad_norm": 0.2792266309261322, | |
| "learning_rate": 1.1291963377416074e-05, | |
| "loss": 0.1004, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 66.63275686673448, | |
| "grad_norm": 0.2717229425907135, | |
| "learning_rate": 1.1122414377755172e-05, | |
| "loss": 0.0987, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 67.0, | |
| "eval_loss": 0.3065015971660614, | |
| "eval_mae": 0.3211498260498047, | |
| "eval_r2": 0.76569664478302, | |
| "eval_rmse": 0.5536254644393921, | |
| "eval_runtime": 21.705, | |
| "eval_samples_per_second": 460.722, | |
| "eval_steps_per_second": 3.64, | |
| "step": 65861 | |
| }, | |
| { | |
| "epoch": 67.1414038657172, | |
| "grad_norm": 0.2361566722393036, | |
| "learning_rate": 1.095286537809427e-05, | |
| "loss": 0.0983, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 67.65005086469989, | |
| "grad_norm": 0.2546670436859131, | |
| "learning_rate": 1.0783316378433368e-05, | |
| "loss": 0.0977, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_loss": 0.30562883615493774, | |
| "eval_mae": 0.32023167610168457, | |
| "eval_r2": 0.7649646997451782, | |
| "eval_rmse": 0.5528367161750793, | |
| "eval_runtime": 20.8457, | |
| "eval_samples_per_second": 479.716, | |
| "eval_steps_per_second": 3.79, | |
| "step": 66844 | |
| }, | |
| { | |
| "epoch": 68.1586978636826, | |
| "grad_norm": 0.24419383704662323, | |
| "learning_rate": 1.0613767378772464e-05, | |
| "loss": 0.0982, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 68.66734486266532, | |
| "grad_norm": 0.27351826429367065, | |
| "learning_rate": 1.0444218379111562e-05, | |
| "loss": 0.0986, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "eval_loss": 0.3047651946544647, | |
| "eval_mae": 0.32021564245224, | |
| "eval_r2": 0.7680791020393372, | |
| "eval_rmse": 0.5520549416542053, | |
| "eval_runtime": 21.4713, | |
| "eval_samples_per_second": 465.738, | |
| "eval_steps_per_second": 3.679, | |
| "step": 67827 | |
| }, | |
| { | |
| "epoch": 69.17599186164801, | |
| "grad_norm": 0.2680034339427948, | |
| "learning_rate": 1.027466937945066e-05, | |
| "loss": 0.0986, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 69.68463886063073, | |
| "grad_norm": 0.2814819812774658, | |
| "learning_rate": 1.010512037978976e-05, | |
| "loss": 0.0984, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_loss": 0.307124525308609, | |
| "eval_mae": 0.3204113841056824, | |
| "eval_r2": 0.7679520845413208, | |
| "eval_rmse": 0.5541877150535583, | |
| "eval_runtime": 22.1583, | |
| "eval_samples_per_second": 451.298, | |
| "eval_steps_per_second": 3.565, | |
| "step": 68810 | |
| }, | |
| { | |
| "epoch": 70.19328585961343, | |
| "grad_norm": 0.27025556564331055, | |
| "learning_rate": 9.935571380128858e-06, | |
| "loss": 0.0964, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 70.70193285859614, | |
| "grad_norm": 0.2899576723575592, | |
| "learning_rate": 9.766022380467957e-06, | |
| "loss": 0.0966, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 71.0, | |
| "eval_loss": 0.3074840307235718, | |
| "eval_mae": 0.3207957148551941, | |
| "eval_r2": 0.7695941925048828, | |
| "eval_rmse": 0.5545119643211365, | |
| "eval_runtime": 23.0374, | |
| "eval_samples_per_second": 434.077, | |
| "eval_steps_per_second": 3.429, | |
| "step": 69793 | |
| }, | |
| { | |
| "epoch": 71.21057985757884, | |
| "grad_norm": 0.2534237205982208, | |
| "learning_rate": 9.596473380807055e-06, | |
| "loss": 0.0985, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 71.71922685656155, | |
| "grad_norm": 0.2650779187679291, | |
| "learning_rate": 9.426924381146151e-06, | |
| "loss": 0.0978, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_loss": 0.3064354956150055, | |
| "eval_mae": 0.3201001286506653, | |
| "eval_r2": 0.7672066688537598, | |
| "eval_rmse": 0.5535657405853271, | |
| "eval_runtime": 23.7676, | |
| "eval_samples_per_second": 420.74, | |
| "eval_steps_per_second": 3.324, | |
| "step": 70776 | |
| }, | |
| { | |
| "epoch": 72.22787385554425, | |
| "grad_norm": 0.2299046665430069, | |
| "learning_rate": 9.257375381485249e-06, | |
| "loss": 0.0966, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 72.73652085452696, | |
| "grad_norm": 0.25810402631759644, | |
| "learning_rate": 9.087826381824347e-06, | |
| "loss": 0.0962, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 73.0, | |
| "eval_loss": 0.3067697286605835, | |
| "eval_mae": 0.32059645652770996, | |
| "eval_r2": 0.770045280456543, | |
| "eval_rmse": 0.553867518901825, | |
| "eval_runtime": 23.7289, | |
| "eval_samples_per_second": 421.427, | |
| "eval_steps_per_second": 3.329, | |
| "step": 71759 | |
| }, | |
| { | |
| "epoch": 73.24516785350967, | |
| "grad_norm": 0.2593117356300354, | |
| "learning_rate": 8.918277382163445e-06, | |
| "loss": 0.097, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 73.75381485249237, | |
| "grad_norm": 0.28874120116233826, | |
| "learning_rate": 8.748728382502543e-06, | |
| "loss": 0.0967, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "eval_loss": 0.30635932087898254, | |
| "eval_mae": 0.31995031237602234, | |
| "eval_r2": 0.7648576498031616, | |
| "eval_rmse": 0.5534969568252563, | |
| "eval_runtime": 22.8055, | |
| "eval_samples_per_second": 438.49, | |
| "eval_steps_per_second": 3.464, | |
| "step": 72742 | |
| }, | |
| { | |
| "epoch": 74.26246185147508, | |
| "grad_norm": 0.2876887917518616, | |
| "learning_rate": 8.579179382841641e-06, | |
| "loss": 0.0955, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 74.77110885045778, | |
| "grad_norm": 0.2901298701763153, | |
| "learning_rate": 8.409630383180738e-06, | |
| "loss": 0.0982, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "eval_loss": 0.3065277338027954, | |
| "eval_mae": 0.3198009133338928, | |
| "eval_r2": 0.7686657905578613, | |
| "eval_rmse": 0.5536490678787231, | |
| "eval_runtime": 21.8575, | |
| "eval_samples_per_second": 457.509, | |
| "eval_steps_per_second": 3.614, | |
| "step": 73725 | |
| }, | |
| { | |
| "epoch": 75.27975584944049, | |
| "grad_norm": 0.23049291968345642, | |
| "learning_rate": 8.240081383519838e-06, | |
| "loss": 0.096, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 75.78840284842319, | |
| "grad_norm": 0.2709825932979584, | |
| "learning_rate": 8.070532383858936e-06, | |
| "loss": 0.0954, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_loss": 0.30666035413742065, | |
| "eval_mae": 0.32015907764434814, | |
| "eval_r2": 0.7675079107284546, | |
| "eval_rmse": 0.5537688136100769, | |
| "eval_runtime": 23.6719, | |
| "eval_samples_per_second": 422.441, | |
| "eval_steps_per_second": 3.337, | |
| "step": 74708 | |
| }, | |
| { | |
| "epoch": 76.2970498474059, | |
| "grad_norm": 0.21834221482276917, | |
| "learning_rate": 7.900983384198034e-06, | |
| "loss": 0.0954, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 76.8056968463886, | |
| "grad_norm": 0.20948007702827454, | |
| "learning_rate": 7.731434384537132e-06, | |
| "loss": 0.097, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 77.0, | |
| "eval_loss": 0.30686530470848083, | |
| "eval_mae": 0.3204137682914734, | |
| "eval_r2": 0.7677739858627319, | |
| "eval_rmse": 0.5539537668228149, | |
| "eval_runtime": 21.4205, | |
| "eval_samples_per_second": 466.843, | |
| "eval_steps_per_second": 3.688, | |
| "step": 75691 | |
| }, | |
| { | |
| "epoch": 77.31434384537131, | |
| "grad_norm": 0.23167894780635834, | |
| "learning_rate": 7.56188538487623e-06, | |
| "loss": 0.0932, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 77.82299084435402, | |
| "grad_norm": 0.24640928208827972, | |
| "learning_rate": 7.392336385215327e-06, | |
| "loss": 0.0977, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "eval_loss": 0.3072910010814667, | |
| "eval_mae": 0.32037079334259033, | |
| "eval_r2": 0.7697309255599976, | |
| "eval_rmse": 0.5543379187583923, | |
| "eval_runtime": 19.5509, | |
| "eval_samples_per_second": 511.487, | |
| "eval_steps_per_second": 4.041, | |
| "step": 76674 | |
| }, | |
| { | |
| "epoch": 78.33163784333672, | |
| "grad_norm": 0.2685850262641907, | |
| "learning_rate": 7.222787385554425e-06, | |
| "loss": 0.0942, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 78.84028484231943, | |
| "grad_norm": 0.2667245864868164, | |
| "learning_rate": 7.0532383858935235e-06, | |
| "loss": 0.0965, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 79.0, | |
| "eval_loss": 0.30803820490837097, | |
| "eval_mae": 0.32020050287246704, | |
| "eval_r2": 0.7687854170799255, | |
| "eval_rmse": 0.555011510848999, | |
| "eval_runtime": 23.1481, | |
| "eval_samples_per_second": 432.0, | |
| "eval_steps_per_second": 3.413, | |
| "step": 77657 | |
| }, | |
| { | |
| "epoch": 79.34893184130213, | |
| "grad_norm": 0.20989225804805756, | |
| "learning_rate": 6.883689386232621e-06, | |
| "loss": 0.0959, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 79.85757884028484, | |
| "grad_norm": 0.2710280418395996, | |
| "learning_rate": 6.71414038657172e-06, | |
| "loss": 0.0945, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_loss": 0.30728211998939514, | |
| "eval_mae": 0.3196803033351898, | |
| "eval_r2": 0.7670853137969971, | |
| "eval_rmse": 0.5543298125267029, | |
| "eval_runtime": 22.6974, | |
| "eval_samples_per_second": 440.579, | |
| "eval_steps_per_second": 3.481, | |
| "step": 78640 | |
| }, | |
| { | |
| "epoch": 80.36622583926754, | |
| "grad_norm": 0.23559938371181488, | |
| "learning_rate": 6.544591386910818e-06, | |
| "loss": 0.0933, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 80.87487283825025, | |
| "grad_norm": 0.2247001826763153, | |
| "learning_rate": 6.375042387249915e-06, | |
| "loss": 0.0955, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 81.0, | |
| "eval_loss": 0.3074042499065399, | |
| "eval_mae": 0.31953954696655273, | |
| "eval_r2": 0.7664982676506042, | |
| "eval_rmse": 0.5544400215148926, | |
| "eval_runtime": 21.5373, | |
| "eval_samples_per_second": 464.311, | |
| "eval_steps_per_second": 3.668, | |
| "step": 79623 | |
| }, | |
| { | |
| "epoch": 81.38351983723297, | |
| "grad_norm": 0.23154723644256592, | |
| "learning_rate": 6.205493387589013e-06, | |
| "loss": 0.0928, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 81.89216683621567, | |
| "grad_norm": 0.2460862100124359, | |
| "learning_rate": 6.035944387928111e-06, | |
| "loss": 0.0957, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "eval_loss": 0.30816981196403503, | |
| "eval_mae": 0.31992191076278687, | |
| "eval_r2": 0.7665845155715942, | |
| "eval_rmse": 0.5551300644874573, | |
| "eval_runtime": 20.0656, | |
| "eval_samples_per_second": 498.366, | |
| "eval_steps_per_second": 3.937, | |
| "step": 80606 | |
| }, | |
| { | |
| "epoch": 82.40081383519838, | |
| "grad_norm": 0.22507379949092865, | |
| "learning_rate": 5.866395388267209e-06, | |
| "loss": 0.0937, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 82.90946083418108, | |
| "grad_norm": 0.26125577092170715, | |
| "learning_rate": 5.696846388606307e-06, | |
| "loss": 0.095, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 83.0, | |
| "eval_loss": 0.3074418008327484, | |
| "eval_mae": 0.31970179080963135, | |
| "eval_r2": 0.7669422626495361, | |
| "eval_rmse": 0.5544738173484802, | |
| "eval_runtime": 23.0334, | |
| "eval_samples_per_second": 434.153, | |
| "eval_steps_per_second": 3.43, | |
| "step": 81589 | |
| }, | |
| { | |
| "epoch": 83.41810783316379, | |
| "grad_norm": 0.2263702154159546, | |
| "learning_rate": 5.5272973889454055e-06, | |
| "loss": 0.0938, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 83.92675483214649, | |
| "grad_norm": 0.2645528018474579, | |
| "learning_rate": 5.357748389284504e-06, | |
| "loss": 0.094, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_loss": 0.30633336305618286, | |
| "eval_mae": 0.3188663721084595, | |
| "eval_r2": 0.7666647434234619, | |
| "eval_rmse": 0.5534735321998596, | |
| "eval_runtime": 23.049, | |
| "eval_samples_per_second": 433.859, | |
| "eval_steps_per_second": 3.427, | |
| "step": 82572 | |
| }, | |
| { | |
| "epoch": 84.4354018311292, | |
| "grad_norm": 0.21599704027175903, | |
| "learning_rate": 5.188199389623601e-06, | |
| "loss": 0.0943, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 84.9440488301119, | |
| "grad_norm": 0.21557337045669556, | |
| "learning_rate": 5.018650389962699e-06, | |
| "loss": 0.0932, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "eval_loss": 0.30662301182746887, | |
| "eval_mae": 0.3191204369068146, | |
| "eval_r2": 0.7680181264877319, | |
| "eval_rmse": 0.5537351369857788, | |
| "eval_runtime": 22.4327, | |
| "eval_samples_per_second": 445.777, | |
| "eval_steps_per_second": 3.522, | |
| "step": 83555 | |
| }, | |
| { | |
| "epoch": 85.45269582909461, | |
| "grad_norm": 0.2520500123500824, | |
| "learning_rate": 4.849101390301798e-06, | |
| "loss": 0.0936, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 85.96134282807732, | |
| "grad_norm": 0.24406184256076813, | |
| "learning_rate": 4.679552390640896e-06, | |
| "loss": 0.0934, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "eval_loss": 0.3075953722000122, | |
| "eval_mae": 0.31922364234924316, | |
| "eval_r2": 0.7682535648345947, | |
| "eval_rmse": 0.554612398147583, | |
| "eval_runtime": 24.2058, | |
| "eval_samples_per_second": 413.124, | |
| "eval_steps_per_second": 3.264, | |
| "step": 84538 | |
| }, | |
| { | |
| "epoch": 86.46998982706002, | |
| "grad_norm": 0.29262858629226685, | |
| "learning_rate": 4.510003390979993e-06, | |
| "loss": 0.0925, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 86.97863682604273, | |
| "grad_norm": 0.23719151318073273, | |
| "learning_rate": 4.340454391319091e-06, | |
| "loss": 0.0946, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 87.0, | |
| "eval_loss": 0.30780571699142456, | |
| "eval_mae": 0.3192029595375061, | |
| "eval_r2": 0.767227292060852, | |
| "eval_rmse": 0.5548020005226135, | |
| "eval_runtime": 21.1225, | |
| "eval_samples_per_second": 473.43, | |
| "eval_steps_per_second": 3.74, | |
| "step": 85521 | |
| }, | |
| { | |
| "epoch": 87.48728382502543, | |
| "grad_norm": 0.2712896168231964, | |
| "learning_rate": 4.170905391658189e-06, | |
| "loss": 0.0922, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 87.99593082400814, | |
| "grad_norm": 0.22555504739284515, | |
| "learning_rate": 4.001356391997287e-06, | |
| "loss": 0.0939, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_loss": 0.3082311153411865, | |
| "eval_mae": 0.3195423483848572, | |
| "eval_r2": 0.7683557271957397, | |
| "eval_rmse": 0.5551851987838745, | |
| "eval_runtime": 21.258, | |
| "eval_samples_per_second": 470.412, | |
| "eval_steps_per_second": 3.716, | |
| "step": 86504 | |
| }, | |
| { | |
| "epoch": 88.50457782299084, | |
| "grad_norm": 0.25731492042541504, | |
| "learning_rate": 3.831807392336386e-06, | |
| "loss": 0.0917, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 89.0, | |
| "eval_loss": 0.30769726634025574, | |
| "eval_mae": 0.3188689947128296, | |
| "eval_r2": 0.7664185166358948, | |
| "eval_rmse": 0.5547041893005371, | |
| "eval_runtime": 22.8134, | |
| "eval_samples_per_second": 438.338, | |
| "eval_steps_per_second": 3.463, | |
| "step": 87487 | |
| }, | |
| { | |
| "epoch": 89.01322482197355, | |
| "grad_norm": 0.21790558099746704, | |
| "learning_rate": 3.6622583926754837e-06, | |
| "loss": 0.0933, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 89.52187182095625, | |
| "grad_norm": 0.20840144157409668, | |
| "learning_rate": 3.4927093930145814e-06, | |
| "loss": 0.0913, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "eval_loss": 0.3072252869606018, | |
| "eval_mae": 0.31897908449172974, | |
| "eval_r2": 0.7661517858505249, | |
| "eval_rmse": 0.5542786717414856, | |
| "eval_runtime": 22.7075, | |
| "eval_samples_per_second": 440.383, | |
| "eval_steps_per_second": 3.479, | |
| "step": 88470 | |
| }, | |
| { | |
| "epoch": 90.03051881993896, | |
| "grad_norm": 0.23747815191745758, | |
| "learning_rate": 3.323160393353679e-06, | |
| "loss": 0.0941, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 90.53916581892167, | |
| "grad_norm": 0.2666161358356476, | |
| "learning_rate": 3.1536113936927776e-06, | |
| "loss": 0.0913, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 91.0, | |
| "eval_loss": 0.30819734930992126, | |
| "eval_mae": 0.3193185329437256, | |
| "eval_r2": 0.7667792439460754, | |
| "eval_rmse": 0.5551548600196838, | |
| "eval_runtime": 23.0912, | |
| "eval_samples_per_second": 433.065, | |
| "eval_steps_per_second": 3.421, | |
| "step": 89453 | |
| }, | |
| { | |
| "epoch": 91.04781281790437, | |
| "grad_norm": 0.2510707676410675, | |
| "learning_rate": 2.9840623940318752e-06, | |
| "loss": 0.0932, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 91.55645981688708, | |
| "grad_norm": 0.2492397129535675, | |
| "learning_rate": 2.8145133943709733e-06, | |
| "loss": 0.0915, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_loss": 0.3087303936481476, | |
| "eval_mae": 0.3195701241493225, | |
| "eval_r2": 0.767082929611206, | |
| "eval_rmse": 0.5556347370147705, | |
| "eval_runtime": 21.4213, | |
| "eval_samples_per_second": 466.825, | |
| "eval_steps_per_second": 3.688, | |
| "step": 90436 | |
| }, | |
| { | |
| "epoch": 92.06510681586978, | |
| "grad_norm": 0.2138824164867401, | |
| "learning_rate": 2.6449643947100714e-06, | |
| "loss": 0.0921, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 92.5737538148525, | |
| "grad_norm": 0.2821875810623169, | |
| "learning_rate": 2.475415395049169e-06, | |
| "loss": 0.091, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 93.0, | |
| "eval_loss": 0.30839282274246216, | |
| "eval_mae": 0.319232702255249, | |
| "eval_r2": 0.7667563557624817, | |
| "eval_rmse": 0.5553308725357056, | |
| "eval_runtime": 21.2783, | |
| "eval_samples_per_second": 469.961, | |
| "eval_steps_per_second": 3.713, | |
| "step": 91419 | |
| }, | |
| { | |
| "epoch": 93.08240081383519, | |
| "grad_norm": 0.2118423730134964, | |
| "learning_rate": 2.305866395388267e-06, | |
| "loss": 0.0929, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 93.5910478128179, | |
| "grad_norm": 0.22796212136745453, | |
| "learning_rate": 2.1363173957273653e-06, | |
| "loss": 0.0913, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "eval_loss": 0.3086921274662018, | |
| "eval_mae": 0.3193605840206146, | |
| "eval_r2": 0.766891598701477, | |
| "eval_rmse": 0.5556001663208008, | |
| "eval_runtime": 23.1152, | |
| "eval_samples_per_second": 432.616, | |
| "eval_steps_per_second": 3.418, | |
| "step": 92402 | |
| }, | |
| { | |
| "epoch": 94.09969481180062, | |
| "grad_norm": 0.2285338044166565, | |
| "learning_rate": 1.9667683960664634e-06, | |
| "loss": 0.0917, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 94.60834181078332, | |
| "grad_norm": 0.20946064591407776, | |
| "learning_rate": 1.7972193964055613e-06, | |
| "loss": 0.0911, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "eval_loss": 0.3085222542285919, | |
| "eval_mae": 0.3191923499107361, | |
| "eval_r2": 0.7661963701248169, | |
| "eval_rmse": 0.5554474592208862, | |
| "eval_runtime": 23.4962, | |
| "eval_samples_per_second": 425.601, | |
| "eval_steps_per_second": 3.362, | |
| "step": 93385 | |
| }, | |
| { | |
| "epoch": 95.11698880976603, | |
| "grad_norm": 0.269754558801651, | |
| "learning_rate": 1.6276703967446594e-06, | |
| "loss": 0.0923, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 95.62563580874873, | |
| "grad_norm": 0.2517547309398651, | |
| "learning_rate": 1.4581213970837572e-06, | |
| "loss": 0.0912, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_loss": 0.3087967038154602, | |
| "eval_mae": 0.3190614581108093, | |
| "eval_r2": 0.7664559483528137, | |
| "eval_rmse": 0.5556943416595459, | |
| "eval_runtime": 21.3044, | |
| "eval_samples_per_second": 469.387, | |
| "eval_steps_per_second": 3.708, | |
| "step": 94368 | |
| }, | |
| { | |
| "epoch": 96.13428280773144, | |
| "grad_norm": 0.200937882065773, | |
| "learning_rate": 1.2885723974228553e-06, | |
| "loss": 0.091, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 96.64292980671414, | |
| "grad_norm": 0.2715360224246979, | |
| "learning_rate": 1.1190233977619532e-06, | |
| "loss": 0.0904, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 97.0, | |
| "eval_loss": 0.30910882353782654, | |
| "eval_mae": 0.3192349076271057, | |
| "eval_r2": 0.7662383317947388, | |
| "eval_rmse": 0.5559751987457275, | |
| "eval_runtime": 21.1316, | |
| "eval_samples_per_second": 473.224, | |
| "eval_steps_per_second": 3.738, | |
| "step": 95351 | |
| }, | |
| { | |
| "epoch": 97.15157680569685, | |
| "grad_norm": 0.24004267156124115, | |
| "learning_rate": 9.494743981010512e-07, | |
| "loss": 0.09, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 97.66022380467955, | |
| "grad_norm": 0.28319135308265686, | |
| "learning_rate": 7.799253984401492e-07, | |
| "loss": 0.0914, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 98.0, | |
| "eval_loss": 0.3087212145328522, | |
| "eval_mae": 0.3190605640411377, | |
| "eval_r2": 0.7665444016456604, | |
| "eval_rmse": 0.5556263327598572, | |
| "eval_runtime": 18.6312, | |
| "eval_samples_per_second": 536.734, | |
| "eval_steps_per_second": 4.24, | |
| "step": 96334 | |
| }, | |
| { | |
| "epoch": 98.16887080366226, | |
| "grad_norm": 0.3092040419578552, | |
| "learning_rate": 6.103763987792473e-07, | |
| "loss": 0.0906, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 98.67751780264497, | |
| "grad_norm": 0.27111703157424927, | |
| "learning_rate": 4.408273991183452e-07, | |
| "loss": 0.0894, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 99.0, | |
| "eval_loss": 0.3089219331741333, | |
| "eval_mae": 0.3191257417201996, | |
| "eval_r2": 0.7667442560195923, | |
| "eval_rmse": 0.5558070540428162, | |
| "eval_runtime": 18.7902, | |
| "eval_samples_per_second": 532.191, | |
| "eval_steps_per_second": 4.204, | |
| "step": 97317 | |
| }, | |
| { | |
| "epoch": 99.18616480162767, | |
| "grad_norm": 0.2768039405345917, | |
| "learning_rate": 2.712783994574432e-07, | |
| "loss": 0.0919, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 99.69481180061038, | |
| "grad_norm": 0.19111211597919464, | |
| "learning_rate": 1.0172939979654121e-07, | |
| "loss": 0.0896, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_loss": 0.30905383825302124, | |
| "eval_mae": 0.3191269636154175, | |
| "eval_r2": 0.7666448354721069, | |
| "eval_rmse": 0.5559256076812744, | |
| "eval_runtime": 18.8185, | |
| "eval_samples_per_second": 531.391, | |
| "eval_steps_per_second": 4.198, | |
| "step": 98300 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 98300, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.3115044487280154e+17, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |