File size: 16,243 Bytes
{
  "best_global_step": 1100,
  "best_metric": 0.8458904558156242,
  "best_model_checkpoint": "modernbert-ai-detector\\checkpoint-1000",
  "epoch": 0.6327445293962563,
  "eval_steps": 100,
  "global_step": 1200,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.026364355391510677,
      "grad_norm": 58.5,
      "learning_rate": 2.5843881856540085e-06,
      "loss": 2.6514,
      "step": 50
    },
    {
      "epoch": 0.052728710783021354,
      "grad_norm": 38.0,
      "learning_rate": 5.221518987341772e-06,
      "loss": 2.4826,
      "step": 100
    },
    {
      "epoch": 0.052728710783021354,
      "eval_accuracy": 0.3540233562629979,
      "eval_f1_ai": 0.1182551168079388,
      "eval_f1_human": 0.3140407288317256,
      "eval_f1_macro": 0.2998623455656316,
      "eval_f1_micro": 0.3540233562629979,
      "eval_f1_mixed": 0.46729119105723055,
      "eval_f1_weighted": 0.35779828876973646,
      "eval_loss": 1.1378378868103027,
      "eval_precision_ai": 0.12716763005780346,
      "eval_precision_human": 0.26479891549932216,
      "eval_precision_mixed": 0.5093530118414278,
      "eval_precision_weighted": 0.37081108952953074,
      "eval_recall_ai": 0.11051004636785162,
      "eval_recall_human": 0.3857801184990125,
      "eval_recall_mixed": 0.4316463059918557,
      "eval_recall_weighted": 0.3540233562629979,
      "eval_runtime": 201.6513,
      "eval_samples_per_second": 61.998,
      "eval_steps_per_second": 1.939,
      "step": 100
    },
    {
      "epoch": 0.07909306617453203,
      "grad_norm": 30.625,
      "learning_rate": 7.858649789029536e-06,
      "loss": 2.1667,
      "step": 150
    },
    {
      "epoch": 0.10545742156604271,
      "grad_norm": 30.25,
      "learning_rate": 1.04957805907173e-05,
      "loss": 1.9264,
      "step": 200
    },
    {
      "epoch": 0.10545742156604271,
      "eval_accuracy": 0.560550311950088,
      "eval_f1_ai": 0.2870420995079278,
      "eval_f1_human": 0.08250526949713942,
      "eval_f1_macro": 0.35789350580335344,
      "eval_f1_micro": 0.560550311950088,
      "eval_f1_mixed": 0.7041331484049931,
      "eval_f1_weighted": 0.46673616146948954,
      "eval_loss": 0.9078124165534973,
      "eval_precision_ai": 0.49065420560747663,
      "eval_precision_human": 0.4840989399293286,
      "eval_precision_mixed": 0.5691990313032559,
      "eval_precision_weighted": 0.5322602945815579,
      "eval_recall_ai": 0.20285935085007728,
      "eval_recall_human": 0.04509545753785385,
      "eval_recall_mixed": 0.9229203025014543,
      "eval_recall_weighted": 0.560550311950088,
      "eval_runtime": 211.1463,
      "eval_samples_per_second": 59.21,
      "eval_steps_per_second": 1.852,
      "step": 200
    },
    {
      "epoch": 0.1318217769575534,
      "grad_norm": 24.875,
      "learning_rate": 1.3132911392405065e-05,
      "loss": 1.768,
      "step": 250
    },
    {
      "epoch": 0.15818613234906406,
      "grad_norm": 49.75,
      "learning_rate": 1.5770042194092827e-05,
      "loss": 1.6275,
      "step": 300
    },
    {
      "epoch": 0.15818613234906406,
      "eval_accuracy": 0.6739721644536875,
      "eval_f1_ai": 0.75,
      "eval_f1_human": 0.20677177565262342,
      "eval_f1_macro": 0.5733127667382775,
      "eval_f1_micro": 0.6739721644536875,
      "eval_f1_mixed": 0.763166524562209,
      "eval_f1_weighted": 0.625236416359176,
      "eval_loss": 0.7546337842941284,
      "eval_precision_ai": 0.6690909090909091,
      "eval_precision_human": 0.4813477737665463,
      "eval_precision_mixed": 0.6950185163062955,
      "eval_precision_weighted": 0.6377290935492025,
      "eval_recall_ai": 0.8531684698608965,
      "eval_recall_human": 0.1316655694535879,
      "eval_recall_mixed": 0.8461314717859221,
      "eval_recall_weighted": 0.6739721644536875,
      "eval_runtime": 215.6745,
      "eval_samples_per_second": 57.967,
      "eval_steps_per_second": 1.813,
      "step": 300
    },
    {
      "epoch": 0.18455048774057475,
      "grad_norm": 14.25,
      "learning_rate": 1.8407172995780592e-05,
      "loss": 1.5471,
      "step": 350
    },
    {
      "epoch": 0.21091484313208542,
      "grad_norm": 21.875,
      "learning_rate": 2.1044303797468356e-05,
      "loss": 1.4071,
      "step": 400
    },
    {
      "epoch": 0.21091484313208542,
      "eval_accuracy": 0.6927691569348904,
      "eval_f1_ai": 0.7882645141260478,
      "eval_f1_human": 0.3710217755443886,
      "eval_f1_macro": 0.639632496479439,
      "eval_f1_micro": 0.6927691569348904,
      "eval_f1_mixed": 0.7596111997678805,
      "eval_f1_weighted": 0.6711149677064479,
      "eval_loss": 0.6714780926704407,
      "eval_precision_ai": 0.6587960560456668,
      "eval_precision_human": 0.5097813578826237,
      "eval_precision_mixed": 0.7577424023154848,
      "eval_precision_weighted": 0.6770050165265453,
      "eval_recall_ai": 0.9810664605873262,
      "eval_recall_human": 0.2916392363396972,
      "eval_recall_mixed": 0.7614892379290285,
      "eval_recall_weighted": 0.6927691569348904,
      "eval_runtime": 198.7063,
      "eval_samples_per_second": 62.917,
      "eval_steps_per_second": 1.968,
      "step": 400
    },
    {
      "epoch": 0.2372791985235961,
      "grad_norm": 77.0,
      "learning_rate": 2.3681434599156117e-05,
      "loss": 1.3628,
      "step": 450
    },
    {
      "epoch": 0.2636435539151068,
      "grad_norm": 30.5,
      "learning_rate": 2.6318565400843882e-05,
      "loss": 1.2541,
      "step": 500
    },
    {
      "epoch": 0.2636435539151068,
      "eval_accuracy": 0.7350023996160614,
      "eval_f1_ai": 0.8567004988818167,
      "eval_f1_human": 0.30513595166163143,
      "eval_f1_macro": 0.654182029936718,
      "eval_f1_micro": 0.7350023996160614,
      "eval_f1_mixed": 0.8007096392667061,
      "eval_f1_weighted": 0.6918751713207526,
      "eval_loss": 0.588551938533783,
      "eval_precision_ai": 0.772093023255814,
      "eval_precision_human": 0.6488222698072805,
      "eval_precision_mixed": 0.7303128371089536,
      "eval_precision_weighted": 0.7191593239339089,
      "eval_recall_ai": 0.9621329211746522,
      "eval_recall_human": 0.19947333772218565,
      "eval_recall_mixed": 0.8861256544502618,
      "eval_recall_weighted": 0.7350023996160614,
      "eval_runtime": 200.2569,
      "eval_samples_per_second": 62.43,
      "eval_steps_per_second": 1.952,
      "step": 500
    },
    {
      "epoch": 0.29000790930661746,
      "grad_norm": 57.25,
      "learning_rate": 2.8955696202531646e-05,
      "loss": 1.2181,
      "step": 550
    },
    {
      "epoch": 0.3163722646981281,
      "grad_norm": 46.5,
      "learning_rate": 3.159282700421941e-05,
      "loss": 1.305,
      "step": 600
    },
    {
      "epoch": 0.3163722646981281,
      "eval_accuracy": 0.7118860982242842,
      "eval_f1_ai": 0.863129435620142,
      "eval_f1_human": 0.563509072724675,
      "eval_f1_macro": 0.7167971002003201,
      "eval_f1_micro": 0.7118860982242842,
      "eval_f1_mixed": 0.723752792256143,
      "eval_f1_weighted": 0.7136653128999944,
      "eval_loss": 0.5719289779663086,
      "eval_precision_ai": 0.766966966966967,
      "eval_precision_human": 0.4978540772532189,
      "eval_precision_mixed": 0.8393782383419689,
      "eval_precision_weighted": 0.7413978534670587,
      "eval_recall_ai": 0.9868624420401855,
      "eval_recall_human": 0.6491112574061882,
      "eval_recall_mixed": 0.6361256544502618,
      "eval_recall_weighted": 0.7118860982242842,
      "eval_runtime": 201.8683,
      "eval_samples_per_second": 61.931,
      "eval_steps_per_second": 1.937,
      "step": 600
    },
    {
      "epoch": 0.3427366200896388,
      "grad_norm": 40.25,
      "learning_rate": 3.422995780590718e-05,
      "loss": 1.1546,
      "step": 650
    },
    {
      "epoch": 0.3691009754811495,
      "grad_norm": 19.375,
      "learning_rate": 3.686708860759494e-05,
      "loss": 1.1359,
      "step": 700
    },
    {
      "epoch": 0.3691009754811495,
      "eval_accuracy": 0.7633978563429851,
      "eval_f1_ai": 0.9003927168868261,
      "eval_f1_human": 0.6369589977220956,
      "eval_f1_macro": 0.7701658728773962,
      "eval_f1_micro": 0.7633978563429851,
      "eval_f1_mixed": 0.7731459040232671,
      "eval_f1_weighted": 0.7663932988679265,
      "eval_loss": 0.5114254355430603,
      "eval_precision_ai": 0.8367617783676178,
      "eval_precision_human": 0.5612142498745609,
      "eval_precision_mixed": 0.8696837513631407,
      "eval_precision_weighted": 0.7879102421938303,
      "eval_recall_ai": 0.9744976816074189,
      "eval_recall_human": 0.7363396971691902,
      "eval_recall_mixed": 0.6958987783595113,
      "eval_recall_weighted": 0.7633978563429851,
      "eval_runtime": 212.8928,
      "eval_samples_per_second": 58.724,
      "eval_steps_per_second": 1.837,
      "step": 700
    },
    {
      "epoch": 0.39546533087266017,
      "grad_norm": 23.125,
      "learning_rate": 3.95042194092827e-05,
      "loss": 1.0872,
      "step": 750
    },
    {
      "epoch": 0.42182968626417083,
      "grad_norm": 157.0,
      "learning_rate": 4.214135021097047e-05,
      "loss": 0.9872,
      "step": 800
    },
    {
      "epoch": 0.42182968626417083,
      "eval_accuracy": 0.8017917133258678,
      "eval_f1_ai": 0.9022680412371133,
      "eval_f1_human": 0.6192686096157416,
      "eval_f1_macro": 0.7856376091542879,
      "eval_f1_micro": 0.8017917133258678,
      "eval_f1_mixed": 0.8353761766100088,
      "eval_f1_weighted": 0.7967088719488636,
      "eval_loss": 0.4510592222213745,
      "eval_precision_ai": 0.9672855879752431,
      "eval_precision_human": 0.7100893997445722,
      "eval_precision_mixed": 0.7816499809910024,
      "eval_precision_weighted": 0.8026885272274894,
      "eval_recall_ai": 0.8454404945904173,
      "eval_recall_human": 0.5490454246214614,
      "eval_recall_mixed": 0.8970331588132635,
      "eval_recall_weighted": 0.8017917133258678,
      "eval_runtime": 206.8504,
      "eval_samples_per_second": 60.44,
      "eval_steps_per_second": 1.89,
      "step": 800
    },
    {
      "epoch": 0.4481940416556815,
      "grad_norm": 46.0,
      "learning_rate": 4.477848101265823e-05,
      "loss": 0.9952,
      "step": 850
    },
    {
      "epoch": 0.4745583970471922,
      "grad_norm": 23.375,
      "learning_rate": 4.7415611814346e-05,
      "loss": 0.8916,
      "step": 900
    },
    {
      "epoch": 0.4745583970471922,
      "eval_accuracy": 0.826427771556551,
      "eval_f1_ai": 0.9497544389875331,
      "eval_f1_human": 0.712417340191036,
      "eval_f1_macro": 0.8327089451194482,
      "eval_f1_micro": 0.826427771556551,
      "eval_f1_mixed": 0.8359550561797753,
      "eval_f1_weighted": 0.8294925079101134,
      "eval_loss": 0.39986124634742737,
      "eval_precision_ai": 0.9290465631929047,
      "eval_precision_human": 0.6434828776214494,
      "eval_precision_mixed": 0.8946757339525626,
      "eval_precision_weighted": 0.8407505866593362,
      "eval_recall_ai": 0.9714064914992272,
      "eval_recall_human": 0.7978933508887426,
      "eval_recall_mixed": 0.7844677137870855,
      "eval_recall_weighted": 0.826427771556551,
      "eval_runtime": 209.7976,
      "eval_samples_per_second": 59.591,
      "eval_steps_per_second": 1.864,
      "step": 900
    },
    {
      "epoch": 0.5009227524387029,
      "grad_norm": 68.5,
      "learning_rate": 4.9994139709329584e-05,
      "loss": 0.899,
      "step": 950
    },
    {
      "epoch": 0.5272871078302136,
      "grad_norm": 24.5,
      "learning_rate": 4.970112517580872e-05,
      "loss": 0.8748,
      "step": 1000
    },
    {
      "epoch": 0.5272871078302136,
      "eval_accuracy": 0.8299472084466485,
      "eval_f1_ai": 0.943791329904482,
      "eval_f1_human": 0.727299016772701,
      "eval_f1_macro": 0.8360557403613388,
      "eval_f1_micro": 0.8299472084466485,
      "eval_f1_mixed": 0.8370768744068333,
      "eval_f1_weighted": 0.8324913584362222,
      "eval_loss": 0.3658604323863983,
      "eval_precision_ai": 0.8995098039215687,
      "eval_precision_human": 0.64853017019082,
      "eval_precision_mixed": 0.9174757281553398,
      "eval_precision_weighted": 0.8484025864969482,
      "eval_recall_ai": 0.9926584234930448,
      "eval_recall_human": 0.8278472679394339,
      "eval_recall_mixed": 0.7696335078534031,
      "eval_recall_weighted": 0.8299472084466485,
      "eval_runtime": 204.603,
      "eval_samples_per_second": 61.104,
      "eval_steps_per_second": 1.911,
      "step": 1000
    },
    {
      "epoch": 0.5536514632217242,
      "grad_norm": 51.75,
      "learning_rate": 4.940811064228786e-05,
      "loss": 0.7453,
      "step": 1050
    },
    {
      "epoch": 0.5800158186132349,
      "grad_norm": 34.5,
      "learning_rate": 4.9115096108766995e-05,
      "loss": 0.8091,
      "step": 1100
    },
    {
      "epoch": 0.5800158186132349,
      "eval_accuracy": 0.8637817949128139,
      "eval_f1_ai": 0.9449456821948076,
      "eval_f1_human": 0.7056443455391462,
      "eval_f1_macro": 0.8458904558156242,
      "eval_f1_micro": 0.8637817949128139,
      "eval_f1_mixed": 0.8870813397129187,
      "eval_f1_weighted": 0.8549702638885073,
      "eval_loss": 0.32950422167778015,
      "eval_precision_ai": 0.9025677101653183,
      "eval_precision_human": 0.9154855643044619,
      "eval_precision_mixed": 0.8368583956667527,
      "eval_precision_weighted": 0.8695671658030227,
      "eval_recall_ai": 0.991499227202473,
      "eval_recall_human": 0.5740618828176431,
      "eval_recall_mixed": 0.943717277486911,
      "eval_recall_weighted": 0.8637817949128139,
      "eval_runtime": 206.2487,
      "eval_samples_per_second": 60.616,
      "eval_steps_per_second": 1.896,
      "step": 1100
    },
    {
      "epoch": 0.6063801740047455,
      "grad_norm": 80.0,
      "learning_rate": 4.882208157524614e-05,
      "loss": 0.7375,
      "step": 1150
    },
    {
      "epoch": 0.6327445293962563,
      "grad_norm": 28.375,
      "learning_rate": 4.8529067041725276e-05,
      "loss": 0.7169,
      "step": 1200
    },
    {
      "epoch": 0.6327445293962563,
      "eval_accuracy": 0.8502639577667573,
      "eval_f1_ai": 0.866811861283297,
      "eval_f1_human": 0.7657316148597423,
      "eval_f1_macro": 0.8360143830281289,
      "eval_f1_micro": 0.8502639577667573,
      "eval_f1_mixed": 0.8754996729413475,
      "eval_f1_weighted": 0.847027475131161,
      "eval_loss": 0.39430809020996094,
      "eval_precision_ai": 0.7651582372079266,
      "eval_precision_human": 0.902591599642538,
      "eval_precision_mixed": 0.8750544820572425,
      "eval_precision_weighted": 0.8589967538180886,
      "eval_recall_ai": 0.999613601236476,
      "eval_recall_human": 0.6649111257406188,
      "eval_recall_mixed": 0.8759453170447935,
      "eval_recall_weighted": 0.8502639577667573,
      "eval_runtime": 214.0168,
      "eval_samples_per_second": 58.416,
      "eval_steps_per_second": 1.827,
      "step": 1200
    }
  ],
  "logging_steps": 50,
  "max_steps": 9480,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 200,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 3,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 1
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 3.987622653354922e+16,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}