| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.999912914743534, | |
| "global_step": 31578, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.9419279907084783e-05, | |
| "loss": 0.0381, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.883855981416957e-05, | |
| "loss": 0.0241, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.825783972125435e-05, | |
| "loss": 0.022, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.767711962833915e-05, | |
| "loss": 0.0207, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.709639953542393e-05, | |
| "loss": 0.0199, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9726582157682948, | |
| "eval_f1_HIRIQ": 0.7791055764551668, | |
| "eval_f1_HOLAM": 0.6713693875265927, | |
| "eval_f1_KUBUTZ": 0.7119160348309771, | |
| "eval_f1_PATACH": 0.851339663936741, | |
| "eval_f1_SHADDA": 0.8292565235143462, | |
| "eval_f1_SHVA": 0.8089031685917613, | |
| "eval_f1_TSERE": 0.7485289696344972, | |
| "eval_loss": 0.013756499625742435, | |
| "eval_macro_f1": 0.7714884749271546, | |
| "eval_micro_f1": 0.8119978846741942, | |
| "eval_runtime": 51.0176, | |
| "eval_samples_per_second": 750.232, | |
| "eval_steps_per_second": 7.507, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.6515679442508716e-05, | |
| "loss": 0.0189, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.59349593495935e-05, | |
| "loss": 0.0177, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.5354239256678285e-05, | |
| "loss": 0.0174, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 4.4773519163763066e-05, | |
| "loss": 0.0172, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 4.4192799070847854e-05, | |
| "loss": 0.0167, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 4.361207897793264e-05, | |
| "loss": 0.0166, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9761089815978242, | |
| "eval_f1_HIRIQ": 0.8106474124231328, | |
| "eval_f1_HOLAM": 0.7086283185840708, | |
| "eval_f1_KUBUTZ": 0.7472128945601075, | |
| "eval_f1_PATACH": 0.8701410253039187, | |
| "eval_f1_SHADDA": 0.8522099185169709, | |
| "eval_f1_SHVA": 0.8329891346774823, | |
| "eval_f1_TSERE": 0.7836281031964952, | |
| "eval_loss": 0.012357393279671669, | |
| "eval_macro_f1": 0.8007795438945969, | |
| "eval_micro_f1": 0.8363289062525998, | |
| "eval_runtime": 52.8127, | |
| "eval_samples_per_second": 724.731, | |
| "eval_steps_per_second": 7.252, | |
| "step": 5741 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 4.303135888501742e-05, | |
| "loss": 0.0157, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 4.245063879210221e-05, | |
| "loss": 0.0151, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 4.186991869918699e-05, | |
| "loss": 0.015, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 4.128919860627178e-05, | |
| "loss": 0.0148, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 4.070847851335656e-05, | |
| "loss": 0.0148, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 4.012775842044135e-05, | |
| "loss": 0.0147, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9780662605569959, | |
| "eval_f1_HIRIQ": 0.8294876571557304, | |
| "eval_f1_HOLAM": 0.7245056654076871, | |
| "eval_f1_KUBUTZ": 0.7711184824965379, | |
| "eval_f1_PATACH": 0.8810396563640023, | |
| "eval_f1_SHADDA": 0.8664105216491798, | |
| "eval_f1_SHVA": 0.8441380510188548, | |
| "eval_f1_TSERE": 0.7970168612191959, | |
| "eval_loss": 0.011536195874214172, | |
| "eval_macro_f1": 0.8162452707587411, | |
| "eval_micro_f1": 0.8498705967528521, | |
| "eval_runtime": 54.0777, | |
| "eval_samples_per_second": 707.778, | |
| "eval_steps_per_second": 7.082, | |
| "step": 8612 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 3.9547038327526136e-05, | |
| "loss": 0.0137, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 3.8966318234610924e-05, | |
| "loss": 0.0134, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 3.8385598141695705e-05, | |
| "loss": 0.0135, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 3.780487804878049e-05, | |
| "loss": 0.0135, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 3.7224157955865274e-05, | |
| "loss": 0.0132, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9792273312868799, | |
| "eval_f1_HIRIQ": 0.8373507327062703, | |
| "eval_f1_HOLAM": 0.7442917433178227, | |
| "eval_f1_KUBUTZ": 0.7827533265097235, | |
| "eval_f1_PATACH": 0.8867928661237257, | |
| "eval_f1_SHADDA": 0.8725204092940786, | |
| "eval_f1_SHVA": 0.8508812530764879, | |
| "eval_f1_TSERE": 0.801735428945645, | |
| "eval_loss": 0.010950990952551365, | |
| "eval_macro_f1": 0.8251893942819649, | |
| "eval_micro_f1": 0.8567964095958783, | |
| "eval_runtime": 52.4162, | |
| "eval_samples_per_second": 730.213, | |
| "eval_steps_per_second": 7.307, | |
| "step": 11483 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 3.664343786295006e-05, | |
| "loss": 0.0131, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 3.606271777003484e-05, | |
| "loss": 0.0122, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 3.548199767711963e-05, | |
| "loss": 0.0122, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 3.490127758420442e-05, | |
| "loss": 0.0123, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 3.43205574912892e-05, | |
| "loss": 0.0122, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 3.373983739837399e-05, | |
| "loss": 0.0122, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9799653269288884, | |
| "eval_f1_HIRIQ": 0.8439961236895427, | |
| "eval_f1_HOLAM": 0.7499999999999999, | |
| "eval_f1_KUBUTZ": 0.7890396727485228, | |
| "eval_f1_PATACH": 0.8902453557194496, | |
| "eval_f1_SHADDA": 0.8791103582869978, | |
| "eval_f1_SHVA": 0.8568982423275623, | |
| "eval_f1_TSERE": 0.8086938522045125, | |
| "eval_loss": 0.010681645944714546, | |
| "eval_macro_f1": 0.8311405149966554, | |
| "eval_micro_f1": 0.8620291101643662, | |
| "eval_runtime": 49.3354, | |
| "eval_samples_per_second": 775.812, | |
| "eval_steps_per_second": 7.763, | |
| "step": 14353 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 3.315911730545877e-05, | |
| "loss": 0.012, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 3.2578397212543556e-05, | |
| "loss": 0.011, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 3.199767711962834e-05, | |
| "loss": 0.0114, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 3.1416957026713125e-05, | |
| "loss": 0.0112, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 3.083623693379791e-05, | |
| "loss": 0.0112, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 3.0255516840882698e-05, | |
| "loss": 0.0112, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9805248675902215, | |
| "eval_f1_HIRIQ": 0.8487574675668987, | |
| "eval_f1_HOLAM": 0.7595126122274476, | |
| "eval_f1_KUBUTZ": 0.797991035951556, | |
| "eval_f1_PATACH": 0.8928041018387551, | |
| "eval_f1_SHADDA": 0.8846275811897255, | |
| "eval_f1_SHVA": 0.8607482938928253, | |
| "eval_f1_TSERE": 0.8122762990085161, | |
| "eval_loss": 0.010463288053870201, | |
| "eval_macro_f1": 0.8366739130965322, | |
| "eval_micro_f1": 0.8659091900525827, | |
| "eval_runtime": 49.2152, | |
| "eval_samples_per_second": 777.707, | |
| "eval_steps_per_second": 7.782, | |
| "step": 17224 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 2.9674796747967482e-05, | |
| "loss": 0.0107, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 2.9094076655052267e-05, | |
| "loss": 0.0105, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 2.851335656213705e-05, | |
| "loss": 0.0103, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 2.7932636469221835e-05, | |
| "loss": 0.0106, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 2.735191637630662e-05, | |
| "loss": 0.0105, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 2.6771196283391408e-05, | |
| "loss": 0.0104, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9811321234870294, | |
| "eval_f1_HIRIQ": 0.8550038223948401, | |
| "eval_f1_HOLAM": 0.763222830168437, | |
| "eval_f1_KUBUTZ": 0.8037383177570094, | |
| "eval_f1_PATACH": 0.8963010344751529, | |
| "eval_f1_SHADDA": 0.8854757929883139, | |
| "eval_f1_SHVA": 0.8649184871804224, | |
| "eval_f1_TSERE": 0.8171559407964675, | |
| "eval_loss": 0.010495145805180073, | |
| "eval_macro_f1": 0.8408308893943776, | |
| "eval_micro_f1": 0.8700826091477137, | |
| "eval_runtime": 52.1218, | |
| "eval_samples_per_second": 734.337, | |
| "eval_steps_per_second": 7.348, | |
| "step": 20095 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 2.6190476190476192e-05, | |
| "loss": 0.0098, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 2.5609756097560977e-05, | |
| "loss": 0.0097, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 2.502903600464576e-05, | |
| "loss": 0.0097, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 2.4448315911730546e-05, | |
| "loss": 0.0098, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 2.3867595818815333e-05, | |
| "loss": 0.0097, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.981536112480715, | |
| "eval_f1_HIRIQ": 0.8594682202461781, | |
| "eval_f1_HOLAM": 0.7635878626208735, | |
| "eval_f1_KUBUTZ": 0.8044461430664374, | |
| "eval_f1_PATACH": 0.8982397971622214, | |
| "eval_f1_SHADDA": 0.888189855746859, | |
| "eval_f1_SHVA": 0.8671149394978173, | |
| "eval_f1_TSERE": 0.8201351021404737, | |
| "eval_loss": 0.010434958152472973, | |
| "eval_macro_f1": 0.8430259886401229, | |
| "eval_micro_f1": 0.8725397588417411, | |
| "eval_runtime": 50.6576, | |
| "eval_samples_per_second": 755.562, | |
| "eval_steps_per_second": 7.561, | |
| "step": 22966 | |
| }, | |
| { | |
| "epoch": 8.01, | |
| "learning_rate": 2.3286875725900118e-05, | |
| "loss": 0.0096, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 8.19, | |
| "learning_rate": 2.2706155632984902e-05, | |
| "loss": 0.009, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "learning_rate": 2.2125435540069687e-05, | |
| "loss": 0.0091, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 2.1544715447154475e-05, | |
| "loss": 0.0091, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 2.096399535423926e-05, | |
| "loss": 0.0092, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 2.038327526132404e-05, | |
| "loss": 0.0091, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9816429946081784, | |
| "eval_f1_HIRIQ": 0.8617756604590732, | |
| "eval_f1_HOLAM": 0.7760372565622354, | |
| "eval_f1_KUBUTZ": 0.8087252634284247, | |
| "eval_f1_PATACH": 0.8981484778351434, | |
| "eval_f1_SHADDA": 0.8896060289509028, | |
| "eval_f1_SHVA": 0.8676933023775238, | |
| "eval_f1_TSERE": 0.8195844976309076, | |
| "eval_loss": 0.010549969039857388, | |
| "eval_macro_f1": 0.8459386410348871, | |
| "eval_micro_f1": 0.8734815959940515, | |
| "eval_runtime": 50.289, | |
| "eval_samples_per_second": 761.101, | |
| "eval_steps_per_second": 7.616, | |
| "step": 25836 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "learning_rate": 1.9802555168408828e-05, | |
| "loss": 0.0088, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 1.9221835075493612e-05, | |
| "loss": 0.0085, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "learning_rate": 1.8641114982578397e-05, | |
| "loss": 0.0086, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 1.806039488966318e-05, | |
| "loss": 0.0085, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 1.747967479674797e-05, | |
| "loss": 0.0086, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 9.93, | |
| "learning_rate": 1.6898954703832754e-05, | |
| "loss": 0.0086, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9820323509296518, | |
| "eval_f1_HIRIQ": 0.8644184686296934, | |
| "eval_f1_HOLAM": 0.7807301793043854, | |
| "eval_f1_KUBUTZ": 0.8107445035070613, | |
| "eval_f1_PATACH": 0.9007614706453949, | |
| "eval_f1_SHADDA": 0.8914874936954775, | |
| "eval_f1_SHVA": 0.8700223746362294, | |
| "eval_f1_TSERE": 0.8237858288440186, | |
| "eval_loss": 0.010514745488762856, | |
| "eval_macro_f1": 0.8488500456088944, | |
| "eval_micro_f1": 0.876108330623615, | |
| "eval_runtime": 51.4328, | |
| "eval_samples_per_second": 744.175, | |
| "eval_steps_per_second": 7.447, | |
| "step": 28707 | |
| }, | |
| { | |
| "epoch": 10.1, | |
| "learning_rate": 1.6318234610917538e-05, | |
| "loss": 0.0083, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "learning_rate": 1.5737514518002326e-05, | |
| "loss": 0.008, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 10.45, | |
| "learning_rate": 1.5156794425087109e-05, | |
| "loss": 0.0082, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 10.62, | |
| "learning_rate": 1.4576074332171893e-05, | |
| "loss": 0.0081, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "learning_rate": 1.3995354239256678e-05, | |
| "loss": 0.0081, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 10.97, | |
| "learning_rate": 1.3414634146341466e-05, | |
| "loss": 0.0081, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.9822652012787683, | |
| "eval_f1_HIRIQ": 0.8650168041552093, | |
| "eval_f1_HOLAM": 0.7809975223526877, | |
| "eval_f1_KUBUTZ": 0.8156180840973759, | |
| "eval_f1_PATACH": 0.9017915187001648, | |
| "eval_f1_SHADDA": 0.893456266013353, | |
| "eval_f1_SHVA": 0.8708873379860418, | |
| "eval_f1_TSERE": 0.824460194494808, | |
| "eval_loss": 0.010741644538939, | |
| "eval_macro_f1": 0.85031824682852, | |
| "eval_micro_f1": 0.8772879261954959, | |
| "eval_runtime": 49.6631, | |
| "eval_samples_per_second": 770.693, | |
| "eval_steps_per_second": 7.712, | |
| "step": 31578 | |
| } | |
| ], | |
| "max_steps": 43050, | |
| "num_train_epochs": 15, | |
| "total_flos": 1.540842233529006e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |