| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 4942, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.020234722784297856, |
| "grad_norm": 8.72037124633789, |
| "learning_rate": 1.25e-06, |
| "loss": 1.209, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04046944556859571, |
| "grad_norm": 5.450883865356445, |
| "learning_rate": 2.5e-06, |
| "loss": 0.9725, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.060704168352893564, |
| "grad_norm": 9.1498441696167, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.6988, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.08093889113719142, |
| "grad_norm": 7.334131717681885, |
| "learning_rate": 5e-06, |
| "loss": 0.5294, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.10117361392148927, |
| "grad_norm": 9.698792457580566, |
| "learning_rate": 4.9472796288485875e-06, |
| "loss": 0.4225, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.12140833670578713, |
| "grad_norm": 4.219217777252197, |
| "learning_rate": 4.894559257697175e-06, |
| "loss": 0.3813, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.141643059490085, |
| "grad_norm": 12.876835823059082, |
| "learning_rate": 4.841838886545762e-06, |
| "loss": 0.333, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.16187778227438285, |
| "grad_norm": 8.669962882995605, |
| "learning_rate": 4.789118515394349e-06, |
| "loss": 0.3039, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1821125050586807, |
| "grad_norm": 13.13197135925293, |
| "learning_rate": 4.736398144242936e-06, |
| "loss": 0.3216, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.20234722784297854, |
| "grad_norm": 10.586642265319824, |
| "learning_rate": 4.683677773091523e-06, |
| "loss": 0.2824, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.20234722784297854, |
| "eval_loss": 0.2994783818721771, |
| "eval_runtime": 99.3532, |
| "eval_samples_per_second": 41.851, |
| "eval_steps_per_second": 0.654, |
| "eval_tag_accuracy": 0.9016354016354017, |
| "eval_tag_f1": 0.9666317639529918, |
| "eval_tag_f1-CONTINUE": 0.9075288265882885, |
| "eval_tag_f1-FINISH": 0.9075288265882885, |
| "eval_tag_f1-TERMINATE": 0.8949396352427433, |
| "eval_token_accuracy": 0.9666317639529918, |
| "eval_token_f1": 0.9016354016354017, |
| "eval_token_f1_negative": 0.9075288265882885, |
| "eval_token_f1_positive": 0.8949396352427433, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2225819506272764, |
| "grad_norm": 5.3532562255859375, |
| "learning_rate": 4.63095740194011e-06, |
| "loss": 0.2726, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.24281667341157426, |
| "grad_norm": 12.720158576965332, |
| "learning_rate": 4.578237030788697e-06, |
| "loss": 0.255, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2630513961958721, |
| "grad_norm": 6.4184088706970215, |
| "learning_rate": 4.525516659637284e-06, |
| "loss": 0.254, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.28328611898017, |
| "grad_norm": 7.8461151123046875, |
| "learning_rate": 4.4727962884858715e-06, |
| "loss": 0.2647, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3035208417644678, |
| "grad_norm": 3.1113088130950928, |
| "learning_rate": 4.420075917334458e-06, |
| "loss": 0.2442, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3237555645487657, |
| "grad_norm": 8.432883262634277, |
| "learning_rate": 4.367355546183045e-06, |
| "loss": 0.247, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3439902873330635, |
| "grad_norm": 4.828508377075195, |
| "learning_rate": 4.314635175031632e-06, |
| "loss": 0.2359, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.3642250101173614, |
| "grad_norm": 4.320587635040283, |
| "learning_rate": 4.26191480388022e-06, |
| "loss": 0.2549, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.38445973290165925, |
| "grad_norm": 8.660967826843262, |
| "learning_rate": 4.209194432728807e-06, |
| "loss": 0.2453, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.4046944556859571, |
| "grad_norm": 6.111636161804199, |
| "learning_rate": 4.156474061577394e-06, |
| "loss": 0.2447, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4046944556859571, |
| "eval_loss": 0.26576703786849976, |
| "eval_runtime": 89.7698, |
| "eval_samples_per_second": 46.318, |
| "eval_steps_per_second": 0.724, |
| "eval_tag_accuracy": 0.9086099086099086, |
| "eval_tag_f1": 0.9704089737670849, |
| "eval_tag_f1-CONTINUE": 0.9146067415730337, |
| "eval_tag_f1-FINISH": 0.9146067415730337, |
| "eval_tag_f1-TERMINATE": 0.9017071908949819, |
| "eval_token_accuracy": 0.9704089737670849, |
| "eval_token_f1": 0.9086099086099086, |
| "eval_token_f1_negative": 0.9146067415730337, |
| "eval_token_f1_positive": 0.9017071908949819, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.42492917847025496, |
| "grad_norm": 6.170385837554932, |
| "learning_rate": 4.103753690425981e-06, |
| "loss": 0.2341, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4451639012545528, |
| "grad_norm": 9.265089988708496, |
| "learning_rate": 4.051033319274568e-06, |
| "loss": 0.2335, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.4653986240388507, |
| "grad_norm": 8.301984786987305, |
| "learning_rate": 3.9983129481231555e-06, |
| "loss": 0.2176, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.4856333468231485, |
| "grad_norm": 2.7331886291503906, |
| "learning_rate": 3.945592576971743e-06, |
| "loss": 0.2537, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5058680696074463, |
| "grad_norm": 3.475696325302124, |
| "learning_rate": 3.89287220582033e-06, |
| "loss": 0.2384, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.5261027923917442, |
| "grad_norm": 10.670371055603027, |
| "learning_rate": 3.840151834668917e-06, |
| "loss": 0.2076, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5463375151760421, |
| "grad_norm": 7.286683559417725, |
| "learning_rate": 3.7874314635175035e-06, |
| "loss": 0.2209, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.56657223796034, |
| "grad_norm": 5.6653876304626465, |
| "learning_rate": 3.7347110923660906e-06, |
| "loss": 0.2277, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5868069607446378, |
| "grad_norm": 8.548470497131348, |
| "learning_rate": 3.6819907212146777e-06, |
| "loss": 0.2178, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.6070416835289356, |
| "grad_norm": 1.2667831182479858, |
| "learning_rate": 3.629270350063265e-06, |
| "loss": 0.2116, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6070416835289356, |
| "eval_loss": 0.21864494681358337, |
| "eval_runtime": 87.99, |
| "eval_samples_per_second": 47.255, |
| "eval_steps_per_second": 0.739, |
| "eval_tag_accuracy": 0.9336219336219336, |
| "eval_tag_f1": 0.9717647246654861, |
| "eval_tag_f1-CONTINUE": 0.9345351043643264, |
| "eval_tag_f1-FINISH": 0.9345351043643264, |
| "eval_tag_f1-TERMINATE": 0.9326829268292683, |
| "eval_token_accuracy": 0.9717647246654861, |
| "eval_token_f1": 0.9336219336219336, |
| "eval_token_f1_negative": 0.9345351043643264, |
| "eval_token_f1_positive": 0.9326829268292683, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6272764063132336, |
| "grad_norm": 12.82320785522461, |
| "learning_rate": 3.576549978911852e-06, |
| "loss": 0.2244, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.6475111290975314, |
| "grad_norm": 5.0135955810546875, |
| "learning_rate": 3.523829607760439e-06, |
| "loss": 0.1991, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.6677458518818292, |
| "grad_norm": 5.661144733428955, |
| "learning_rate": 3.471109236609026e-06, |
| "loss": 0.2301, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.687980574666127, |
| "grad_norm": 4.150299549102783, |
| "learning_rate": 3.4183888654576133e-06, |
| "loss": 0.215, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7082152974504249, |
| "grad_norm": 1.3773038387298584, |
| "learning_rate": 3.3656684943062e-06, |
| "loss": 0.2415, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.7284500202347228, |
| "grad_norm": 7.56788969039917, |
| "learning_rate": 3.312948123154787e-06, |
| "loss": 0.215, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.7486847430190207, |
| "grad_norm": 9.647685050964355, |
| "learning_rate": 3.260227752003374e-06, |
| "loss": 0.2187, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.7689194658033185, |
| "grad_norm": 3.352022409439087, |
| "learning_rate": 3.2075073808519613e-06, |
| "loss": 0.232, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7891541885876163, |
| "grad_norm": 6.138416290283203, |
| "learning_rate": 3.1547870097005484e-06, |
| "loss": 0.2067, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.8093889113719142, |
| "grad_norm": 5.341176509857178, |
| "learning_rate": 3.1020666385491355e-06, |
| "loss": 0.2068, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8093889113719142, |
| "eval_loss": 0.202407106757164, |
| "eval_runtime": 99.5295, |
| "eval_samples_per_second": 41.777, |
| "eval_steps_per_second": 0.653, |
| "eval_tag_accuracy": 0.9377104377104377, |
| "eval_tag_f1": 0.973316054937847, |
| "eval_tag_f1-CONTINUE": 0.93849441937782, |
| "eval_tag_f1-FINISH": 0.93849441937782, |
| "eval_tag_f1-TERMINATE": 0.9369062119366626, |
| "eval_token_accuracy": 0.973316054937847, |
| "eval_token_f1": 0.9377104377104377, |
| "eval_token_f1_negative": 0.93849441937782, |
| "eval_token_f1_positive": 0.9369062119366626, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8296236341562121, |
| "grad_norm": 3.7065298557281494, |
| "learning_rate": 3.0493462673977226e-06, |
| "loss": 0.2186, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.8498583569405099, |
| "grad_norm": 11.173847198486328, |
| "learning_rate": 2.9966258962463097e-06, |
| "loss": 0.2298, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.8700930797248078, |
| "grad_norm": 7.461451053619385, |
| "learning_rate": 2.943905525094897e-06, |
| "loss": 0.2171, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.8903278025091056, |
| "grad_norm": 5.774527549743652, |
| "learning_rate": 2.891185153943484e-06, |
| "loss": 0.2008, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.9105625252934035, |
| "grad_norm": 13.355608940124512, |
| "learning_rate": 2.8384647827920706e-06, |
| "loss": 0.1928, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.9307972480777014, |
| "grad_norm": 5.512803554534912, |
| "learning_rate": 2.7857444116406586e-06, |
| "loss": 0.2151, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.9510319708619992, |
| "grad_norm": 7.355721473693848, |
| "learning_rate": 2.7330240404892457e-06, |
| "loss": 0.2313, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.971266693646297, |
| "grad_norm": 8.145475387573242, |
| "learning_rate": 2.680303669337833e-06, |
| "loss": 0.206, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.9915014164305949, |
| "grad_norm": 3.35742449760437, |
| "learning_rate": 2.6275832981864195e-06, |
| "loss": 0.2126, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.0117361392148927, |
| "grad_norm": 4.693106651306152, |
| "learning_rate": 2.5748629270350066e-06, |
| "loss": 0.1958, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.0117361392148927, |
| "eval_loss": 0.20388753712177277, |
| "eval_runtime": 90.2103, |
| "eval_samples_per_second": 46.092, |
| "eval_steps_per_second": 0.721, |
| "eval_tag_accuracy": 0.9321789321789322, |
| "eval_tag_f1": 0.9744642522149032, |
| "eval_tag_f1-CONTINUE": 0.9338338808071328, |
| "eval_tag_f1-FINISH": 0.9338338808071328, |
| "eval_tag_f1-TERMINATE": 0.9304390725209669, |
| "eval_token_accuracy": 0.9744642522149032, |
| "eval_token_f1": 0.9321789321789322, |
| "eval_token_f1_negative": 0.9338338808071328, |
| "eval_token_f1_positive": 0.9304390725209669, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.0319708619991905, |
| "grad_norm": 4.066263675689697, |
| "learning_rate": 2.5221425558835937e-06, |
| "loss": 0.1705, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.0522055847834886, |
| "grad_norm": 1.981619954109192, |
| "learning_rate": 2.4694221847321804e-06, |
| "loss": 0.2019, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.0724403075677864, |
| "grad_norm": 17.264829635620117, |
| "learning_rate": 2.416701813580768e-06, |
| "loss": 0.1502, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.0926750303520842, |
| "grad_norm": 8.771439552307129, |
| "learning_rate": 2.363981442429355e-06, |
| "loss": 0.1552, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.112909753136382, |
| "grad_norm": 11.144837379455566, |
| "learning_rate": 2.311261071277942e-06, |
| "loss": 0.1537, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.13314447592068, |
| "grad_norm": 3.879704475402832, |
| "learning_rate": 2.2585407001265293e-06, |
| "loss": 0.1816, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.1533791987049777, |
| "grad_norm": 4.202072620391846, |
| "learning_rate": 2.2058203289751164e-06, |
| "loss": 0.1522, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.1736139214892756, |
| "grad_norm": 19.130678176879883, |
| "learning_rate": 2.1530999578237035e-06, |
| "loss": 0.1646, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.1938486442735734, |
| "grad_norm": 1.3073982000350952, |
| "learning_rate": 2.10037958667229e-06, |
| "loss": 0.1594, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.2140833670578712, |
| "grad_norm": 6.3615851402282715, |
| "learning_rate": 2.0476592155208773e-06, |
| "loss": 0.1616, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.2140833670578712, |
| "eval_loss": 0.21922506392002106, |
| "eval_runtime": 90.428, |
| "eval_samples_per_second": 45.981, |
| "eval_steps_per_second": 0.719, |
| "eval_tag_accuracy": 0.9391534391534392, |
| "eval_tag_f1": 0.974827471052257, |
| "eval_tag_f1-CONTINUE": 0.9403161122906346, |
| "eval_tag_f1-FINISH": 0.9403161122906346, |
| "eval_tag_f1-TERMINATE": 0.9379445670836399, |
| "eval_token_accuracy": 0.974827471052257, |
| "eval_token_f1": 0.9391534391534392, |
| "eval_token_f1_negative": 0.9403161122906346, |
| "eval_token_f1_positive": 0.9379445670836399, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.2343180898421693, |
| "grad_norm": 5.983785152435303, |
| "learning_rate": 1.9949388443694644e-06, |
| "loss": 0.1884, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.254552812626467, |
| "grad_norm": 7.194103717803955, |
| "learning_rate": 1.9422184732180515e-06, |
| "loss": 0.1682, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.274787535410765, |
| "grad_norm": 8.178751945495605, |
| "learning_rate": 1.8894981020666386e-06, |
| "loss": 0.1621, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.2950222581950628, |
| "grad_norm": 9.569229125976562, |
| "learning_rate": 1.8367777309152257e-06, |
| "loss": 0.162, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.3152569809793606, |
| "grad_norm": 10.708954811096191, |
| "learning_rate": 1.7840573597638128e-06, |
| "loss": 0.1437, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.3354917037636584, |
| "grad_norm": 8.722265243530273, |
| "learning_rate": 1.7313369886124e-06, |
| "loss": 0.163, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.3557264265479563, |
| "grad_norm": 1.2432576417922974, |
| "learning_rate": 1.6786166174609872e-06, |
| "loss": 0.1529, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.375961149332254, |
| "grad_norm": 8.486040115356445, |
| "learning_rate": 1.6258962463095744e-06, |
| "loss": 0.1509, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.396195872116552, |
| "grad_norm": 7.521843910217285, |
| "learning_rate": 1.5731758751581612e-06, |
| "loss": 0.176, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.41643059490085, |
| "grad_norm": 5.913136959075928, |
| "learning_rate": 1.5204555040067484e-06, |
| "loss": 0.1635, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.41643059490085, |
| "eval_loss": 0.21361203491687775, |
| "eval_runtime": 99.2858, |
| "eval_samples_per_second": 41.879, |
| "eval_steps_per_second": 0.655, |
| "eval_tag_accuracy": 0.9389129389129389, |
| "eval_tag_f1": 0.9748354538838472, |
| "eval_tag_f1-CONTINUE": 0.9399243140964996, |
| "eval_tag_f1-FINISH": 0.9399243140964996, |
| "eval_tag_f1-TERMINATE": 0.937866927592955, |
| "eval_token_accuracy": 0.9748354538838472, |
| "eval_token_f1": 0.9389129389129389, |
| "eval_token_f1_negative": 0.9399243140964996, |
| "eval_token_f1_positive": 0.937866927592955, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.4366653176851476, |
| "grad_norm": 3.9816882610321045, |
| "learning_rate": 1.4677351328553355e-06, |
| "loss": 0.1598, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.4569000404694457, |
| "grad_norm": 11.924986839294434, |
| "learning_rate": 1.4150147617039226e-06, |
| "loss": 0.1722, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.4771347632537435, |
| "grad_norm": 4.420969009399414, |
| "learning_rate": 1.3622943905525097e-06, |
| "loss": 0.1408, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.4973694860380413, |
| "grad_norm": 11.450356483459473, |
| "learning_rate": 1.3095740194010966e-06, |
| "loss": 0.167, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.5176042088223392, |
| "grad_norm": 8.506190299987793, |
| "learning_rate": 1.2568536482496837e-06, |
| "loss": 0.1547, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.537838931606637, |
| "grad_norm": 2.8181309700012207, |
| "learning_rate": 1.2041332770982708e-06, |
| "loss": 0.1425, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.5580736543909348, |
| "grad_norm": 2.5935609340667725, |
| "learning_rate": 1.151412905946858e-06, |
| "loss": 0.1547, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.5783083771752326, |
| "grad_norm": 10.59027099609375, |
| "learning_rate": 1.098692534795445e-06, |
| "loss": 0.1495, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.5985430999595307, |
| "grad_norm": 2.5813493728637695, |
| "learning_rate": 1.0459721636440321e-06, |
| "loss": 0.1305, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.6187778227438283, |
| "grad_norm": 1.844016671180725, |
| "learning_rate": 9.932517924926192e-07, |
| "loss": 0.1566, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.6187778227438283, |
| "eval_loss": 0.22482524812221527, |
| "eval_runtime": 87.0812, |
| "eval_samples_per_second": 47.749, |
| "eval_steps_per_second": 0.746, |
| "eval_tag_accuracy": 0.9377104377104377, |
| "eval_tag_f1": 0.9749232650313393, |
| "eval_tag_f1-CONTINUE": 0.9394718392147698, |
| "eval_tag_f1-FINISH": 0.9394718392147698, |
| "eval_tag_f1-TERMINATE": 0.9358434481050285, |
| "eval_token_accuracy": 0.9749232650313393, |
| "eval_token_f1": 0.9377104377104377, |
| "eval_token_f1_negative": 0.9394718392147698, |
| "eval_token_f1_positive": 0.9358434481050285, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.6390125455281264, |
| "grad_norm": 9.678681373596191, |
| "learning_rate": 9.405314213412063e-07, |
| "loss": 0.1682, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.659247268312424, |
| "grad_norm": 11.279572486877441, |
| "learning_rate": 8.878110501897934e-07, |
| "loss": 0.1655, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.679481991096722, |
| "grad_norm": 6.408283710479736, |
| "learning_rate": 8.350906790383805e-07, |
| "loss": 0.1375, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.6997167138810199, |
| "grad_norm": 8.360751152038574, |
| "learning_rate": 7.823703078869676e-07, |
| "loss": 0.148, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.7199514366653177, |
| "grad_norm": 9.093498229980469, |
| "learning_rate": 7.296499367355547e-07, |
| "loss": 0.1511, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.7401861594496155, |
| "grad_norm": 8.47696304321289, |
| "learning_rate": 6.769295655841418e-07, |
| "loss": 0.183, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.7604208822339134, |
| "grad_norm": 5.258668422698975, |
| "learning_rate": 6.242091944327289e-07, |
| "loss": 0.1583, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.7806556050182114, |
| "grad_norm": 2.8200266361236572, |
| "learning_rate": 5.714888232813159e-07, |
| "loss": 0.1392, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.800890327802509, |
| "grad_norm": 1.5096231698989868, |
| "learning_rate": 5.18768452129903e-07, |
| "loss": 0.1252, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.821125050586807, |
| "grad_norm": 7.513113975524902, |
| "learning_rate": 4.660480809784902e-07, |
| "loss": 0.165, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.821125050586807, |
| "eval_loss": 0.21320512890815735, |
| "eval_runtime": 90.0868, |
| "eval_samples_per_second": 46.156, |
| "eval_steps_per_second": 0.722, |
| "eval_tag_accuracy": 0.9403559403559404, |
| "eval_tag_f1": 0.9753503465214146, |
| "eval_tag_f1-CONTINUE": 0.9416470588235294, |
| "eval_tag_f1-FINISH": 0.9416470588235294, |
| "eval_tag_f1-TERMINATE": 0.9390063944909002, |
| "eval_token_accuracy": 0.9753503465214146, |
| "eval_token_f1": 0.9403559403559404, |
| "eval_token_f1_negative": 0.9416470588235294, |
| "eval_token_f1_positive": 0.9390063944909002, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.8413597733711047, |
| "grad_norm": 3.344792604446411, |
| "learning_rate": 4.1332770982707723e-07, |
| "loss": 0.1645, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.8615944961554027, |
| "grad_norm": 1.2586418390274048, |
| "learning_rate": 3.606073386756643e-07, |
| "loss": 0.1497, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.8818292189397006, |
| "grad_norm": 5.412999629974365, |
| "learning_rate": 3.078869675242514e-07, |
| "loss": 0.1586, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.9020639417239984, |
| "grad_norm": 4.021740436553955, |
| "learning_rate": 2.551665963728385e-07, |
| "loss": 0.1804, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.9222986645082962, |
| "grad_norm": 5.421517372131348, |
| "learning_rate": 2.0244622522142556e-07, |
| "loss": 0.1426, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.942533387292594, |
| "grad_norm": 13.003674507141113, |
| "learning_rate": 1.4972585407001267e-07, |
| "loss": 0.1798, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.962768110076892, |
| "grad_norm": 2.8676435947418213, |
| "learning_rate": 9.700548291859976e-08, |
| "loss": 0.1675, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.9830028328611897, |
| "grad_norm": 5.923835277557373, |
| "learning_rate": 4.4285111767186845e-08, |
| "loss": 0.1406, |
| "step": 4900 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 4942, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "total_flos": 1.105108458176471e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|