{ "best_global_step": 1100, "best_metric": 0.8458904558156242, "best_model_checkpoint": "modernbert-ai-detector\\checkpoint-1000", "epoch": 0.6327445293962563, "eval_steps": 100, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.026364355391510677, "grad_norm": 58.5, "learning_rate": 2.5843881856540085e-06, "loss": 2.6514, "step": 50 }, { "epoch": 0.052728710783021354, "grad_norm": 38.0, "learning_rate": 5.221518987341772e-06, "loss": 2.4826, "step": 100 }, { "epoch": 0.052728710783021354, "eval_accuracy": 0.3540233562629979, "eval_f1_ai": 0.1182551168079388, "eval_f1_human": 0.3140407288317256, "eval_f1_macro": 0.2998623455656316, "eval_f1_micro": 0.3540233562629979, "eval_f1_mixed": 0.46729119105723055, "eval_f1_weighted": 0.35779828876973646, "eval_loss": 1.1378378868103027, "eval_precision_ai": 0.12716763005780346, "eval_precision_human": 0.26479891549932216, "eval_precision_mixed": 0.5093530118414278, "eval_precision_weighted": 0.37081108952953074, "eval_recall_ai": 0.11051004636785162, "eval_recall_human": 0.3857801184990125, "eval_recall_mixed": 0.4316463059918557, "eval_recall_weighted": 0.3540233562629979, "eval_runtime": 201.6513, "eval_samples_per_second": 61.998, "eval_steps_per_second": 1.939, "step": 100 }, { "epoch": 0.07909306617453203, "grad_norm": 30.625, "learning_rate": 7.858649789029536e-06, "loss": 2.1667, "step": 150 }, { "epoch": 0.10545742156604271, "grad_norm": 30.25, "learning_rate": 1.04957805907173e-05, "loss": 1.9264, "step": 200 }, { "epoch": 0.10545742156604271, "eval_accuracy": 0.560550311950088, "eval_f1_ai": 0.2870420995079278, "eval_f1_human": 0.08250526949713942, "eval_f1_macro": 0.35789350580335344, "eval_f1_micro": 0.560550311950088, "eval_f1_mixed": 0.7041331484049931, "eval_f1_weighted": 0.46673616146948954, "eval_loss": 0.9078124165534973, "eval_precision_ai": 0.49065420560747663, "eval_precision_human": 0.4840989399293286, "eval_precision_mixed": 0.5691990313032559, "eval_precision_weighted": 0.5322602945815579, "eval_recall_ai": 0.20285935085007728, "eval_recall_human": 0.04509545753785385, "eval_recall_mixed": 0.9229203025014543, "eval_recall_weighted": 0.560550311950088, "eval_runtime": 211.1463, "eval_samples_per_second": 59.21, "eval_steps_per_second": 1.852, "step": 200 }, { "epoch": 0.1318217769575534, "grad_norm": 24.875, "learning_rate": 1.3132911392405065e-05, "loss": 1.768, "step": 250 }, { "epoch": 0.15818613234906406, "grad_norm": 49.75, "learning_rate": 1.5770042194092827e-05, "loss": 1.6275, "step": 300 }, { "epoch": 0.15818613234906406, "eval_accuracy": 0.6739721644536875, "eval_f1_ai": 0.75, "eval_f1_human": 0.20677177565262342, "eval_f1_macro": 0.5733127667382775, "eval_f1_micro": 0.6739721644536875, "eval_f1_mixed": 0.763166524562209, "eval_f1_weighted": 0.625236416359176, "eval_loss": 0.7546337842941284, "eval_precision_ai": 0.6690909090909091, "eval_precision_human": 0.4813477737665463, "eval_precision_mixed": 0.6950185163062955, "eval_precision_weighted": 0.6377290935492025, "eval_recall_ai": 0.8531684698608965, "eval_recall_human": 0.1316655694535879, "eval_recall_mixed": 0.8461314717859221, "eval_recall_weighted": 0.6739721644536875, "eval_runtime": 215.6745, "eval_samples_per_second": 57.967, "eval_steps_per_second": 1.813, "step": 300 }, { "epoch": 0.18455048774057475, "grad_norm": 14.25, "learning_rate": 1.8407172995780592e-05, "loss": 1.5471, "step": 350 }, { "epoch": 0.21091484313208542, "grad_norm": 21.875, "learning_rate": 2.1044303797468356e-05, "loss": 1.4071, "step": 400 }, { "epoch": 0.21091484313208542, "eval_accuracy": 0.6927691569348904, "eval_f1_ai": 0.7882645141260478, "eval_f1_human": 0.3710217755443886, "eval_f1_macro": 0.639632496479439, "eval_f1_micro": 0.6927691569348904, "eval_f1_mixed": 0.7596111997678805, "eval_f1_weighted": 0.6711149677064479, "eval_loss": 0.6714780926704407, "eval_precision_ai": 0.6587960560456668, "eval_precision_human": 0.5097813578826237, "eval_precision_mixed": 0.7577424023154848, "eval_precision_weighted": 0.6770050165265453, "eval_recall_ai": 0.9810664605873262, "eval_recall_human": 0.2916392363396972, "eval_recall_mixed": 0.7614892379290285, "eval_recall_weighted": 0.6927691569348904, "eval_runtime": 198.7063, "eval_samples_per_second": 62.917, "eval_steps_per_second": 1.968, "step": 400 }, { "epoch": 0.2372791985235961, "grad_norm": 77.0, "learning_rate": 2.3681434599156117e-05, "loss": 1.3628, "step": 450 }, { "epoch": 0.2636435539151068, "grad_norm": 30.5, "learning_rate": 2.6318565400843882e-05, "loss": 1.2541, "step": 500 }, { "epoch": 0.2636435539151068, "eval_accuracy": 0.7350023996160614, "eval_f1_ai": 0.8567004988818167, "eval_f1_human": 0.30513595166163143, "eval_f1_macro": 0.654182029936718, "eval_f1_micro": 0.7350023996160614, "eval_f1_mixed": 0.8007096392667061, "eval_f1_weighted": 0.6918751713207526, "eval_loss": 0.588551938533783, "eval_precision_ai": 0.772093023255814, "eval_precision_human": 0.6488222698072805, "eval_precision_mixed": 0.7303128371089536, "eval_precision_weighted": 0.7191593239339089, "eval_recall_ai": 0.9621329211746522, "eval_recall_human": 0.19947333772218565, "eval_recall_mixed": 0.8861256544502618, "eval_recall_weighted": 0.7350023996160614, "eval_runtime": 200.2569, "eval_samples_per_second": 62.43, "eval_steps_per_second": 1.952, "step": 500 }, { "epoch": 0.29000790930661746, "grad_norm": 57.25, "learning_rate": 2.8955696202531646e-05, "loss": 1.2181, "step": 550 }, { "epoch": 0.3163722646981281, "grad_norm": 46.5, "learning_rate": 3.159282700421941e-05, "loss": 1.305, "step": 600 }, { "epoch": 0.3163722646981281, "eval_accuracy": 0.7118860982242842, "eval_f1_ai": 0.863129435620142, "eval_f1_human": 0.563509072724675, "eval_f1_macro": 0.7167971002003201, "eval_f1_micro": 0.7118860982242842, "eval_f1_mixed": 0.723752792256143, "eval_f1_weighted": 0.7136653128999944, "eval_loss": 0.5719289779663086, "eval_precision_ai": 0.766966966966967, "eval_precision_human": 0.4978540772532189, "eval_precision_mixed": 0.8393782383419689, "eval_precision_weighted": 0.7413978534670587, "eval_recall_ai": 0.9868624420401855, "eval_recall_human": 0.6491112574061882, "eval_recall_mixed": 0.6361256544502618, "eval_recall_weighted": 0.7118860982242842, "eval_runtime": 201.8683, "eval_samples_per_second": 61.931, "eval_steps_per_second": 1.937, "step": 600 }, { "epoch": 0.3427366200896388, "grad_norm": 40.25, "learning_rate": 3.422995780590718e-05, "loss": 1.1546, "step": 650 }, { "epoch": 0.3691009754811495, "grad_norm": 19.375, "learning_rate": 3.686708860759494e-05, "loss": 1.1359, "step": 700 }, { "epoch": 0.3691009754811495, "eval_accuracy": 0.7633978563429851, "eval_f1_ai": 0.9003927168868261, "eval_f1_human": 0.6369589977220956, "eval_f1_macro": 0.7701658728773962, "eval_f1_micro": 0.7633978563429851, "eval_f1_mixed": 0.7731459040232671, "eval_f1_weighted": 0.7663932988679265, "eval_loss": 0.5114254355430603, "eval_precision_ai": 0.8367617783676178, "eval_precision_human": 0.5612142498745609, "eval_precision_mixed": 0.8696837513631407, "eval_precision_weighted": 0.7879102421938303, "eval_recall_ai": 0.9744976816074189, "eval_recall_human": 0.7363396971691902, "eval_recall_mixed": 0.6958987783595113, "eval_recall_weighted": 0.7633978563429851, "eval_runtime": 212.8928, "eval_samples_per_second": 58.724, "eval_steps_per_second": 1.837, "step": 700 }, { "epoch": 0.39546533087266017, "grad_norm": 23.125, "learning_rate": 3.95042194092827e-05, "loss": 1.0872, "step": 750 }, { "epoch": 0.42182968626417083, "grad_norm": 157.0, "learning_rate": 4.214135021097047e-05, "loss": 0.9872, "step": 800 }, { "epoch": 0.42182968626417083, "eval_accuracy": 0.8017917133258678, "eval_f1_ai": 0.9022680412371133, "eval_f1_human": 0.6192686096157416, "eval_f1_macro": 0.7856376091542879, "eval_f1_micro": 0.8017917133258678, "eval_f1_mixed": 0.8353761766100088, "eval_f1_weighted": 0.7967088719488636, "eval_loss": 0.4510592222213745, "eval_precision_ai": 0.9672855879752431, "eval_precision_human": 0.7100893997445722, "eval_precision_mixed": 0.7816499809910024, "eval_precision_weighted": 0.8026885272274894, "eval_recall_ai": 0.8454404945904173, "eval_recall_human": 0.5490454246214614, "eval_recall_mixed": 0.8970331588132635, "eval_recall_weighted": 0.8017917133258678, "eval_runtime": 206.8504, "eval_samples_per_second": 60.44, "eval_steps_per_second": 1.89, "step": 800 }, { "epoch": 0.4481940416556815, "grad_norm": 46.0, "learning_rate": 4.477848101265823e-05, "loss": 0.9952, "step": 850 }, { "epoch": 0.4745583970471922, "grad_norm": 23.375, "learning_rate": 4.7415611814346e-05, "loss": 0.8916, "step": 900 }, { "epoch": 0.4745583970471922, "eval_accuracy": 0.826427771556551, "eval_f1_ai": 0.9497544389875331, "eval_f1_human": 0.712417340191036, "eval_f1_macro": 0.8327089451194482, "eval_f1_micro": 0.826427771556551, "eval_f1_mixed": 0.8359550561797753, "eval_f1_weighted": 0.8294925079101134, "eval_loss": 0.39986124634742737, "eval_precision_ai": 0.9290465631929047, "eval_precision_human": 0.6434828776214494, "eval_precision_mixed": 0.8946757339525626, "eval_precision_weighted": 0.8407505866593362, "eval_recall_ai": 0.9714064914992272, "eval_recall_human": 0.7978933508887426, "eval_recall_mixed": 0.7844677137870855, "eval_recall_weighted": 0.826427771556551, "eval_runtime": 209.7976, "eval_samples_per_second": 59.591, "eval_steps_per_second": 1.864, "step": 900 }, { "epoch": 0.5009227524387029, "grad_norm": 68.5, "learning_rate": 4.9994139709329584e-05, "loss": 0.899, "step": 950 }, { "epoch": 0.5272871078302136, "grad_norm": 24.5, "learning_rate": 4.970112517580872e-05, "loss": 0.8748, "step": 1000 }, { "epoch": 0.5272871078302136, "eval_accuracy": 0.8299472084466485, "eval_f1_ai": 0.943791329904482, "eval_f1_human": 0.727299016772701, "eval_f1_macro": 0.8360557403613388, "eval_f1_micro": 0.8299472084466485, "eval_f1_mixed": 0.8370768744068333, "eval_f1_weighted": 0.8324913584362222, "eval_loss": 0.3658604323863983, "eval_precision_ai": 0.8995098039215687, "eval_precision_human": 0.64853017019082, "eval_precision_mixed": 0.9174757281553398, "eval_precision_weighted": 0.8484025864969482, "eval_recall_ai": 0.9926584234930448, "eval_recall_human": 0.8278472679394339, "eval_recall_mixed": 0.7696335078534031, "eval_recall_weighted": 0.8299472084466485, "eval_runtime": 204.603, "eval_samples_per_second": 61.104, "eval_steps_per_second": 1.911, "step": 1000 }, { "epoch": 0.5536514632217242, "grad_norm": 51.75, "learning_rate": 4.940811064228786e-05, "loss": 0.7453, "step": 1050 }, { "epoch": 0.5800158186132349, "grad_norm": 34.5, "learning_rate": 4.9115096108766995e-05, "loss": 0.8091, "step": 1100 }, { "epoch": 0.5800158186132349, "eval_accuracy": 0.8637817949128139, "eval_f1_ai": 0.9449456821948076, "eval_f1_human": 0.7056443455391462, "eval_f1_macro": 0.8458904558156242, "eval_f1_micro": 0.8637817949128139, "eval_f1_mixed": 0.8870813397129187, "eval_f1_weighted": 0.8549702638885073, "eval_loss": 0.32950422167778015, "eval_precision_ai": 0.9025677101653183, "eval_precision_human": 0.9154855643044619, "eval_precision_mixed": 0.8368583956667527, "eval_precision_weighted": 0.8695671658030227, "eval_recall_ai": 0.991499227202473, "eval_recall_human": 0.5740618828176431, "eval_recall_mixed": 0.943717277486911, "eval_recall_weighted": 0.8637817949128139, "eval_runtime": 206.2487, "eval_samples_per_second": 60.616, "eval_steps_per_second": 1.896, "step": 1100 }, { "epoch": 0.6063801740047455, "grad_norm": 80.0, "learning_rate": 4.882208157524614e-05, "loss": 0.7375, "step": 1150 }, { "epoch": 0.6327445293962563, "grad_norm": 28.375, "learning_rate": 4.8529067041725276e-05, "loss": 0.7169, "step": 1200 }, { "epoch": 0.6327445293962563, "eval_accuracy": 0.8502639577667573, "eval_f1_ai": 0.866811861283297, "eval_f1_human": 0.7657316148597423, "eval_f1_macro": 0.8360143830281289, "eval_f1_micro": 0.8502639577667573, "eval_f1_mixed": 0.8754996729413475, "eval_f1_weighted": 0.847027475131161, "eval_loss": 0.39430809020996094, "eval_precision_ai": 0.7651582372079266, "eval_precision_human": 0.902591599642538, "eval_precision_mixed": 0.8750544820572425, "eval_precision_weighted": 0.8589967538180886, "eval_recall_ai": 0.999613601236476, "eval_recall_human": 0.6649111257406188, "eval_recall_mixed": 0.8759453170447935, "eval_recall_weighted": 0.8502639577667573, "eval_runtime": 214.0168, "eval_samples_per_second": 58.416, "eval_steps_per_second": 1.827, "step": 1200 } ], "logging_steps": 50, "max_steps": 9480, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.987622653354922e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }