{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.874015748031496, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "eval_accuracy": 0.8157711500522223, "eval_f1": 0.5942329873125721, "eval_loss": 0.4848455488681793, "eval_precision": 0.5368903709879116, "eval_recall": 0.6652892561983471, "eval_runtime": 2.2498, "eval_samples_per_second": 41.337, "eval_steps_per_second": 1.333, "step": 20 }, { "epoch": 0.31, "eval_accuracy": 0.8389520714865962, "eval_f1": 0.7553421368547419, "eval_loss": 0.39182421565055847, "eval_precision": 0.705697622252131, "eval_recall": 0.8125, "eval_runtime": 2.9557, "eval_samples_per_second": 31.465, "eval_steps_per_second": 1.015, "step": 40 }, { "epoch": 0.47, "eval_accuracy": 0.8849947777648833, "eval_f1": 0.8393364928909953, "eval_loss": 0.34172168374061584, "eval_precision": 0.7753940455341506, "eval_recall": 0.9147727272727273, "eval_runtime": 2.9806, "eval_samples_per_second": 31.202, "eval_steps_per_second": 1.007, "step": 60 }, { "epoch": 0.63, "eval_accuracy": 0.8782929093652083, "eval_f1": 0.878610002347969, "eval_loss": 0.3457355201244354, "eval_precision": 0.8054240206629358, "eval_recall": 0.9664256198347108, "eval_runtime": 3.288, "eval_samples_per_second": 28.285, "eval_steps_per_second": 0.912, "step": 80 }, { "epoch": 0.79, "eval_accuracy": 0.9183880700940003, "eval_f1": 0.8993830090175606, "eval_loss": 0.26329365372657776, "eval_precision": 0.8318700614574188, "eval_recall": 0.9788223140495868, "eval_runtime": 3.262, "eval_samples_per_second": 28.51, "eval_steps_per_second": 0.92, "step": 100 }, { "epoch": 0.94, "eval_accuracy": 0.9218695601717535, "eval_f1": 0.908175497482618, "eval_loss": 0.27159208059310913, "eval_precision": 0.847427293064877, "eval_recall": 0.9783057851239669, "eval_runtime": 2.2237, "eval_samples_per_second": 41.822, "eval_steps_per_second": 1.349, "step": 120 }, { "epoch": 1.1, "eval_accuracy": 0.931965881397238, "eval_f1": 0.8820210939416238, "eval_loss": 0.20828573405742645, "eval_precision": 0.8397944885567492, "eval_recall": 0.9287190082644629, "eval_runtime": 2.2685, "eval_samples_per_second": 40.996, "eval_steps_per_second": 1.322, "step": 140 }, { "epoch": 1.26, "eval_accuracy": 0.893669490541952, "eval_f1": 0.8734673859735164, "eval_loss": 0.33937641978263855, "eval_precision": 0.8314659197012139, "eval_recall": 0.9199380165289256, "eval_runtime": 3.2652, "eval_samples_per_second": 28.482, "eval_steps_per_second": 0.919, "step": 160 }, { "epoch": 1.42, "eval_accuracy": 0.883079958222119, "eval_f1": 0.8970068347867075, "eval_loss": 0.35709869861602783, "eval_precision": 0.8248807975726051, "eval_recall": 0.9829545454545454, "eval_runtime": 3.2871, "eval_samples_per_second": 28.292, "eval_steps_per_second": 0.913, "step": 180 }, { "epoch": 1.57, "eval_accuracy": 0.9112800278519206, "eval_f1": 0.8886283704572099, "eval_loss": 0.2868669033050537, "eval_precision": 0.8136539287247746, "eval_recall": 0.9788223140495868, "eval_runtime": 2.9919, "eval_samples_per_second": 31.084, "eval_steps_per_second": 1.003, "step": 200 }, { "epoch": 1.73, "eval_accuracy": 0.9393350353951492, "eval_f1": 0.9247259439707675, "eval_loss": 0.201907679438591, "eval_precision": 0.8750576302443522, "eval_recall": 0.9803719008264463, "eval_runtime": 2.2839, "eval_samples_per_second": 40.72, "eval_steps_per_second": 1.314, "step": 220 }, { "epoch": 1.89, "eval_accuracy": 0.9525937101079262, "eval_f1": 0.9215399610136452, "eval_loss": 0.1594531089067459, "eval_precision": 0.8722324723247232, "eval_recall": 0.9767561983471075, "eval_runtime": 2.9584, "eval_samples_per_second": 31.436, "eval_steps_per_second": 1.014, "step": 240 }, { "epoch": 2.05, "eval_accuracy": 0.9378263896947894, "eval_f1": 0.9166058837831268, "eval_loss": 0.22776451706886292, "eval_precision": 0.8658704639412035, "eval_recall": 0.9736570247933884, "eval_runtime": 3.2621, "eval_samples_per_second": 28.509, "eval_steps_per_second": 0.92, "step": 260 }, { "epoch": 2.2, "eval_accuracy": 0.9292677265869792, "eval_f1": 0.8771048002010556, "eval_loss": 0.23985013365745544, "eval_precision": 0.8541360744003916, "eval_recall": 0.9013429752066116, "eval_runtime": 2.2706, "eval_samples_per_second": 40.959, "eval_steps_per_second": 1.321, "step": 280 }, { "epoch": 2.36, "eval_accuracy": 0.9322850179876987, "eval_f1": 0.9097725568607848, "eval_loss": 0.2839757800102234, "eval_precision": 0.8813559322033898, "eval_recall": 0.9400826446280992, "eval_runtime": 3.2602, "eval_samples_per_second": 28.525, "eval_steps_per_second": 0.92, "step": 300 }, { "epoch": 2.52, "eval_accuracy": 0.9572356968782639, "eval_f1": 0.9368919930157147, "eval_loss": 0.15871362388134003, "eval_precision": 0.9059334298118669, "eval_recall": 0.9700413223140496, "eval_runtime": 2.2505, "eval_samples_per_second": 41.325, "eval_steps_per_second": 1.333, "step": 320 }, { "epoch": 2.68, "eval_accuracy": 0.9558431008471626, "eval_f1": 0.9427135678391959, "eval_loss": 0.16202741861343384, "eval_precision": 0.9178082191780822, "eval_recall": 0.96900826446281, "eval_runtime": 3.2378, "eval_samples_per_second": 28.723, "eval_steps_per_second": 0.927, "step": 340 }, { "epoch": 2.83, "eval_accuracy": 0.9299059997679007, "eval_f1": 0.9343610144206862, "eval_loss": 0.27064642310142517, "eval_precision": 0.9007670182166826, "eval_recall": 0.9705578512396694, "eval_runtime": 3.2832, "eval_samples_per_second": 28.326, "eval_steps_per_second": 0.914, "step": 360 }, { "epoch": 2.99, "eval_accuracy": 0.9477486364163862, "eval_f1": 0.9423980222496909, "eval_loss": 0.18907134234905243, "eval_precision": 0.9037458511142722, "eval_recall": 0.984504132231405, "eval_runtime": 3.2921, "eval_samples_per_second": 28.25, "eval_steps_per_second": 0.911, "step": 380 }, { "epoch": 3.15, "eval_accuracy": 0.9234072182894278, "eval_f1": 0.9175810155651952, "eval_loss": 0.26186808943748474, "eval_precision": 0.9067070095814422, "eval_recall": 0.9287190082644629, "eval_runtime": 3.296, "eval_samples_per_second": 28.216, "eval_steps_per_second": 0.91, "step": 400 }, { "epoch": 3.31, "eval_accuracy": 0.9601949634443542, "eval_f1": 0.9384654342871613, "eval_loss": 0.1693572849035263, "eval_precision": 0.9205166418281172, "eval_recall": 0.9571280991735537, "eval_runtime": 2.2486, "eval_samples_per_second": 41.36, "eval_steps_per_second": 1.334, "step": 420 }, { "epoch": 3.46, "eval_accuracy": 0.9532900081234769, "eval_f1": 0.9351432880844646, "eval_loss": 0.20067894458770752, "eval_precision": 0.910871694417238, "eval_recall": 0.9607438016528925, "eval_runtime": 3.2966, "eval_samples_per_second": 28.211, "eval_steps_per_second": 0.91, "step": 440 }, { "epoch": 3.62, "eval_accuracy": 0.9529998839503308, "eval_f1": 0.9408805031446541, "eval_loss": 0.2007509469985962, "eval_precision": 0.9171162334477685, "eval_recall": 0.9659090909090909, "eval_runtime": 2.9788, "eval_samples_per_second": 31.22, "eval_steps_per_second": 1.007, "step": 460 }, { "epoch": 3.78, "eval_accuracy": 0.9438609724962284, "eval_f1": 0.9448345035105317, "eval_loss": 0.2780458927154541, "eval_precision": 0.9181286549707602, "eval_recall": 0.9731404958677686, "eval_runtime": 2.2763, "eval_samples_per_second": 40.855, "eval_steps_per_second": 1.318, "step": 480 }, { "epoch": 3.94, "learning_rate": 3.5e-05, "loss": 0.2747, "step": 500 }, { "epoch": 3.94, "eval_accuracy": 0.9434838110711384, "eval_f1": 0.9407256004087889, "eval_loss": 0.22288289666175842, "eval_precision": 0.9307381193124368, "eval_recall": 0.9509297520661157, "eval_runtime": 2.2152, "eval_samples_per_second": 41.983, "eval_steps_per_second": 1.354, "step": 500 }, { "epoch": 4.09, "eval_accuracy": 0.9413659046071718, "eval_f1": 0.9442211055276382, "eval_loss": 0.2373497188091278, "eval_precision": 0.9192759295499021, "eval_recall": 0.9705578512396694, "eval_runtime": 2.286, "eval_samples_per_second": 40.682, "eval_steps_per_second": 1.312, "step": 520 }, { "epoch": 4.25, "eval_accuracy": 0.9290066148311478, "eval_f1": 0.9232756393696718, "eval_loss": 0.32156404852867126, "eval_precision": 0.9235142118863049, "eval_recall": 0.9230371900826446, "eval_runtime": 3.2781, "eval_samples_per_second": 28.371, "eval_steps_per_second": 0.915, "step": 540 }, { "epoch": 4.41, "eval_accuracy": 0.9621387954044331, "eval_f1": 0.9501378100726634, "eval_loss": 0.1727248579263687, "eval_precision": 0.9226277372262773, "eval_recall": 0.9793388429752066, "eval_runtime": 3.2891, "eval_samples_per_second": 28.275, "eval_steps_per_second": 0.912, "step": 560 }, { "epoch": 4.57, "eval_accuracy": 0.9612974353023094, "eval_f1": 0.9563451776649746, "eval_loss": 0.2031358927488327, "eval_precision": 0.9401197604790419, "eval_recall": 0.9731404958677686, "eval_runtime": 2.9761, "eval_samples_per_second": 31.249, "eval_steps_per_second": 1.008, "step": 580 }, { "epoch": 4.72, "eval_accuracy": 0.9507079029824765, "eval_f1": 0.9494897959183674, "eval_loss": 0.24843396246433258, "eval_precision": 0.9380040322580645, "eval_recall": 0.9612603305785123, "eval_runtime": 2.242, "eval_samples_per_second": 41.481, "eval_steps_per_second": 1.338, "step": 600 }, { "epoch": 4.88, "eval_accuracy": 0.9590054543344552, "eval_f1": 0.9607390300230947, "eval_loss": 0.2189687043428421, "eval_precision": 0.9546149923508415, "eval_recall": 0.9669421487603306, "eval_runtime": 3.2795, "eval_samples_per_second": 28.358, "eval_steps_per_second": 0.915, "step": 620 }, { "epoch": 5.04, "eval_accuracy": 0.9571486596263201, "eval_f1": 0.9600409836065573, "eval_loss": 0.25752872228622437, "eval_precision": 0.9522357723577236, "eval_recall": 0.9679752066115702, "eval_runtime": 2.2527, "eval_samples_per_second": 41.284, "eval_steps_per_second": 1.332, "step": 640 }, { "epoch": 5.2, "eval_accuracy": 0.9608622490425902, "eval_f1": 0.9593869731800767, "eval_loss": 0.24718773365020752, "eval_precision": 0.9489641232945932, "eval_recall": 0.9700413223140496, "eval_runtime": 2.2796, "eval_samples_per_second": 40.796, "eval_steps_per_second": 1.316, "step": 660 }, { "epoch": 5.35, "eval_accuracy": 0.9522165486828362, "eval_f1": 0.9561671763506625, "eval_loss": 0.2798936367034912, "eval_precision": 0.9436619718309859, "eval_recall": 0.96900826446281, "eval_runtime": 3.2922, "eval_samples_per_second": 28.249, "eval_steps_per_second": 0.911, "step": 680 }, { "epoch": 5.51, "eval_accuracy": 0.9556400139259603, "eval_f1": 0.9535588086824837, "eval_loss": 0.26087334752082825, "eval_precision": 0.9323790720631787, "eval_recall": 0.9757231404958677, "eval_runtime": 2.2499, "eval_samples_per_second": 41.335, "eval_steps_per_second": 1.333, "step": 700 }, { "epoch": 5.67, "eval_accuracy": 0.9529418591157015, "eval_f1": 0.9502170028082715, "eval_loss": 0.23053081333637238, "eval_precision": 0.939424533064109, "eval_recall": 0.9612603305785123, "eval_runtime": 3.2663, "eval_samples_per_second": 28.473, "eval_steps_per_second": 0.918, "step": 720 }, { "epoch": 5.83, "eval_accuracy": 0.9524486480213531, "eval_f1": 0.954763709881223, "eval_loss": 0.2539260983467102, "eval_precision": 0.9346857991093518, "eval_recall": 0.9757231404958677, "eval_runtime": 3.298, "eval_samples_per_second": 28.199, "eval_steps_per_second": 0.91, "step": 740 }, { "epoch": 5.98, "eval_accuracy": 0.9543634675641175, "eval_f1": 0.9617654606107262, "eval_loss": 0.28202250599861145, "eval_precision": 0.9556348801631821, "eval_recall": 0.9679752066115702, "eval_runtime": 3.2935, "eval_samples_per_second": 28.237, "eval_steps_per_second": 0.911, "step": 760 }, { "epoch": 6.14, "eval_accuracy": 0.9599048392712081, "eval_f1": 0.9617065021845285, "eval_loss": 0.23321200907230377, "eval_precision": 0.9570332480818414, "eval_recall": 0.9664256198347108, "eval_runtime": 3.2967, "eval_samples_per_second": 28.21, "eval_steps_per_second": 0.91, "step": 780 }, { "epoch": 6.3, "eval_accuracy": 0.9590054543344552, "eval_f1": 0.961734693877551, "eval_loss": 0.25181254744529724, "eval_precision": 0.9501008064516129, "eval_recall": 0.9736570247933884, "eval_runtime": 2.9863, "eval_samples_per_second": 31.142, "eval_steps_per_second": 1.005, "step": 800 }, { "epoch": 6.46, "eval_accuracy": 0.95752582105141, "eval_f1": 0.9602258147292788, "eval_loss": 0.2890544533729553, "eval_precision": 0.954105048444671, "eval_recall": 0.9664256198347108, "eval_runtime": 2.2411, "eval_samples_per_second": 41.497, "eval_steps_per_second": 1.339, "step": 820 }, { "epoch": 6.61, "eval_accuracy": 0.9568875478704886, "eval_f1": 0.9569451563300871, "eval_loss": 0.29066193103790283, "eval_precision": 0.9496439471007121, "eval_recall": 0.9643595041322314, "eval_runtime": 2.2281, "eval_samples_per_second": 41.74, "eval_steps_per_second": 1.346, "step": 840 }, { "epoch": 6.77, "eval_accuracy": 0.9598178020192643, "eval_f1": 0.9613118114271074, "eval_loss": 0.26129651069641113, "eval_precision": 0.9537366548042705, "eval_recall": 0.96900826446281, "eval_runtime": 2.2516, "eval_samples_per_second": 41.305, "eval_steps_per_second": 1.332, "step": 860 }, { "epoch": 6.93, "eval_accuracy": 0.9592085412556575, "eval_f1": 0.96229802513465, "eval_loss": 0.25751829147338867, "eval_precision": 0.9556800815078961, "eval_recall": 0.96900826446281, "eval_runtime": 2.2645, "eval_samples_per_second": 41.068, "eval_steps_per_second": 1.325, "step": 880 }, { "epoch": 7.09, "eval_accuracy": 0.9583381687362191, "eval_f1": 0.9625832906201948, "eval_loss": 0.26336678862571716, "eval_precision": 0.9552390640895219, "eval_recall": 0.9700413223140496, "eval_runtime": 2.2487, "eval_samples_per_second": 41.357, "eval_steps_per_second": 1.334, "step": 900 }, { "epoch": 7.24, "eval_accuracy": 0.9581640942323314, "eval_f1": 0.9588550983899822, "eval_loss": 0.2639918923377991, "eval_precision": 0.9489124936772888, "eval_recall": 0.96900826446281, "eval_runtime": 2.996, "eval_samples_per_second": 31.041, "eval_steps_per_second": 1.001, "step": 920 }, { "epoch": 7.4, "eval_accuracy": 0.9608622490425902, "eval_f1": 0.9594284256187804, "eval_loss": 0.2650795876979828, "eval_precision": 0.9480584972264247, "eval_recall": 0.9710743801652892, "eval_runtime": 3.2719, "eval_samples_per_second": 28.424, "eval_steps_per_second": 0.917, "step": 940 }, { "epoch": 7.56, "eval_accuracy": 0.9626900313334107, "eval_f1": 0.9609992352791231, "eval_loss": 0.26358404755592346, "eval_precision": 0.9486663311524912, "eval_recall": 0.9736570247933884, "eval_runtime": 3.271, "eval_samples_per_second": 28.432, "eval_steps_per_second": 0.917, "step": 960 }, { "epoch": 7.72, "eval_accuracy": 0.9627190437507253, "eval_f1": 0.9622256253190403, "eval_loss": 0.2597278356552124, "eval_precision": 0.9510595358224017, "eval_recall": 0.9736570247933884, "eval_runtime": 2.9416, "eval_samples_per_second": 31.616, "eval_steps_per_second": 1.02, "step": 980 }, { "epoch": 7.87, "learning_rate": 0.0, "loss": 0.0555, "step": 1000 }, { "epoch": 7.87, "eval_accuracy": 0.9627190437507253, "eval_f1": 0.961489415965315, "eval_loss": 0.25795629620552063, "eval_precision": 0.9496221662468514, "eval_recall": 0.9736570247933884, "eval_runtime": 3.3, "eval_samples_per_second": 28.182, "eval_steps_per_second": 0.909, "step": 1000 }, { "epoch": 7.87, "step": 1000, "total_flos": 2101461798354944.0, "train_loss": 0.16514424514770507, "train_runtime": 1048.0167, "train_samples_per_second": 7.633, "train_steps_per_second": 0.954 } ], "max_steps": 1000, "num_train_epochs": 8, "total_flos": 2101461798354944.0, "trial_name": null, "trial_params": null }