{ "best_global_step": 1024, "best_metric": 0.9530514004567643, "best_model_checkpoint": "checkpoints/checkpoint-1024", "epoch": 0.37730287398673545, "eval_steps": 128, "global_step": 1024, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0036845983787767134, "grad_norm": 8.914146423339844, "learning_rate": 1.3499999999999998e-05, "loss": 0.6841, "step": 10 }, { "epoch": 0.007369196757553427, "grad_norm": 6.666998386383057, "learning_rate": 2.8499999999999998e-05, "loss": 0.517, "step": 20 }, { "epoch": 0.01105379513633014, "grad_norm": 5.241199016571045, "learning_rate": 4.3499999999999993e-05, "loss": 0.4514, "step": 30 }, { "epoch": 0.014738393515106854, "grad_norm": 5.938083171844482, "learning_rate": 5.85e-05, "loss": 0.4963, "step": 40 }, { "epoch": 0.018422991893883568, "grad_norm": 3.599522590637207, "learning_rate": 7.35e-05, "loss": 0.4226, "step": 50 }, { "epoch": 0.02210759027266028, "grad_norm": 4.397755146026611, "learning_rate": 8.849999999999998e-05, "loss": 0.4117, "step": 60 }, { "epoch": 0.025792188651436992, "grad_norm": 11.364407539367676, "learning_rate": 0.00010349999999999998, "loss": 0.3636, "step": 70 }, { "epoch": 0.029476787030213707, "grad_norm": 3.12703800201416, "learning_rate": 0.0001185, "loss": 0.3726, "step": 80 }, { "epoch": 0.03316138540899042, "grad_norm": 2.9937469959259033, "learning_rate": 0.0001335, "loss": 0.3421, "step": 90 }, { "epoch": 0.036845983787767135, "grad_norm": 2.8763785362243652, "learning_rate": 0.00014849999999999998, "loss": 0.4045, "step": 100 }, { "epoch": 0.040530582166543844, "grad_norm": 4.576082706451416, "learning_rate": 0.0001635, "loss": 0.3629, "step": 110 }, { "epoch": 0.04421518054532056, "grad_norm": 4.948099613189697, "learning_rate": 0.00017849999999999997, "loss": 0.394, "step": 120 }, { "epoch": 0.04716285924834193, "eval_has_safety_issue_roc_auc": 0.918227021974626, "eval_loss": 0.40556982159614563, "eval_mean_roc_auc": 0.918227021974626, "eval_runtime": 489.5112, "eval_samples_per_second": 76.035, "eval_steps_per_second": 2.378, "step": 128 }, { "epoch": 0.047899778924097275, "grad_norm": 2.6524598598480225, "learning_rate": 0.0001935, "loss": 0.3734, "step": 130 }, { "epoch": 0.051584377302873984, "grad_norm": 4.847095966339111, "learning_rate": 0.00020849999999999997, "loss": 0.3707, "step": 140 }, { "epoch": 0.0552689756816507, "grad_norm": 2.1986277103424072, "learning_rate": 0.00022349999999999998, "loss": 0.3914, "step": 150 }, { "epoch": 0.058953574060427415, "grad_norm": 1.9227941036224365, "learning_rate": 0.0002385, "loss": 0.3834, "step": 160 }, { "epoch": 0.06263817243920412, "grad_norm": 1.3873552083969116, "learning_rate": 0.0002535, "loss": 0.2576, "step": 170 }, { "epoch": 0.06632277081798084, "grad_norm": 4.532320022583008, "learning_rate": 0.00026849999999999997, "loss": 0.4172, "step": 180 }, { "epoch": 0.07000736919675755, "grad_norm": 2.5069925785064697, "learning_rate": 0.00028349999999999995, "loss": 0.4712, "step": 190 }, { "epoch": 0.07369196757553427, "grad_norm": 1.9502817392349243, "learning_rate": 0.0002985, "loss": 0.3573, "step": 200 }, { "epoch": 0.07737656595431099, "grad_norm": 6.129453182220459, "learning_rate": 0.00029994004614753843, "loss": 0.2982, "step": 210 }, { "epoch": 0.08106116433308769, "grad_norm": 4.473495006561279, "learning_rate": 0.00029973285979173177, "loss": 0.4217, "step": 220 }, { "epoch": 0.0847457627118644, "grad_norm": 1.3703476190567017, "learning_rate": 0.0002993779051807778, "loss": 0.3758, "step": 230 }, { "epoch": 0.08843036109064112, "grad_norm": 1.4541234970092773, "learning_rate": 0.00029887553261202354, "loss": 0.3863, "step": 240 }, { "epoch": 0.09211495946941783, "grad_norm": 1.872299075126648, "learning_rate": 0.00029822623786654207, "loss": 0.3525, "step": 250 }, { "epoch": 0.09432571849668386, "eval_has_safety_issue_roc_auc": 0.9362037324386603, "eval_loss": 0.33471250534057617, "eval_mean_roc_auc": 0.9362037324386603, "eval_runtime": 487.3367, "eval_samples_per_second": 76.374, "eval_steps_per_second": 2.388, "step": 256 }, { "epoch": 0.09579955784819455, "grad_norm": 2.644226312637329, "learning_rate": 0.0002974306617198568, "loss": 0.3905, "step": 260 }, { "epoch": 0.09948415622697127, "grad_norm": 2.3409223556518555, "learning_rate": 0.0002964895893095737, "loss": 0.3515, "step": 270 }, { "epoch": 0.10316875460574797, "grad_norm": 2.0618879795074463, "learning_rate": 0.00029540394936054435, "loss": 0.3592, "step": 280 }, { "epoch": 0.10685335298452468, "grad_norm": 0.9815084338188171, "learning_rate": 0.00029417481326832776, "loss": 0.3944, "step": 290 }, { "epoch": 0.1105379513633014, "grad_norm": 4.9908013343811035, "learning_rate": 0.00029280339404185146, "loss": 0.4092, "step": 300 }, { "epoch": 0.11422254974207811, "grad_norm": 0.6844871640205383, "learning_rate": 0.00029129104510631853, "loss": 0.4466, "step": 310 }, { "epoch": 0.11790714812085483, "grad_norm": 1.8045854568481445, "learning_rate": 0.00028963925896754035, "loss": 0.3698, "step": 320 }, { "epoch": 0.12159174649963155, "grad_norm": 4.337238788604736, "learning_rate": 0.00028784966573901314, "loss": 0.3892, "step": 330 }, { "epoch": 0.12527634487840825, "grad_norm": 0.8228144645690918, "learning_rate": 0.0002859240315331935, "loss": 0.417, "step": 340 }, { "epoch": 0.12896094325718496, "grad_norm": 1.259918212890625, "learning_rate": 0.00028386425671855764, "loss": 0.3393, "step": 350 }, { "epoch": 0.13264554163596168, "grad_norm": 4.035144805908203, "learning_rate": 0.00028167237404416826, "loss": 0.2893, "step": 360 }, { "epoch": 0.1363301400147384, "grad_norm": 1.1749995946884155, "learning_rate": 0.0002793505466335956, "loss": 0.4133, "step": 370 }, { "epoch": 0.1400147383935151, "grad_norm": 1.3367811441421509, "learning_rate": 0.0002769010658501763, "loss": 0.3775, "step": 380 }, { "epoch": 0.1414885777450258, "eval_has_safety_issue_roc_auc": 0.9363893661137548, "eval_loss": 0.3488316535949707, "eval_mean_roc_auc": 0.9363893661137548, "eval_runtime": 487.0696, "eval_samples_per_second": 76.416, "eval_steps_per_second": 2.39, "step": 384 }, { "epoch": 0.14369933677229182, "grad_norm": 1.7076958417892456, "learning_rate": 0.00027432634903571426, "loss": 0.4061, "step": 390 }, { "epoch": 0.14738393515106854, "grad_norm": 1.1982675790786743, "learning_rate": 0.000271628937124856, "loss": 0.3591, "step": 400 }, { "epoch": 0.15106853352984526, "grad_norm": 0.9970278143882751, "learning_rate": 0.000268811492137495, "loss": 0.3556, "step": 410 }, { "epoch": 0.15475313190862197, "grad_norm": 3.2783939838409424, "learning_rate": 0.0002658767945516796, "loss": 0.3119, "step": 420 }, { "epoch": 0.1584377302873987, "grad_norm": 0.9100527763366699, "learning_rate": 0.0002628277405596167, "loss": 0.3235, "step": 430 }, { "epoch": 0.16212232866617537, "grad_norm": 2.0034751892089844, "learning_rate": 0.0002596673392094796, "loss": 0.3409, "step": 440 }, { "epoch": 0.1658069270449521, "grad_norm": 2.4519588947296143, "learning_rate": 0.00025639870943584104, "loss": 0.3431, "step": 450 }, { "epoch": 0.1694915254237288, "grad_norm": 3.1098690032958984, "learning_rate": 0.0002530250769816612, "loss": 0.3966, "step": 460 }, { "epoch": 0.17317612380250552, "grad_norm": 1.0801705121994019, "learning_rate": 0.0002495497712148688, "loss": 0.3026, "step": 470 }, { "epoch": 0.17686072218128224, "grad_norm": 2.5059814453125, "learning_rate": 0.00024597622184267673, "loss": 0.3053, "step": 480 }, { "epoch": 0.18054532056005895, "grad_norm": 4.749985694885254, "learning_rate": 0.00024230795552687568, "loss": 0.3585, "step": 490 }, { "epoch": 0.18422991893883567, "grad_norm": 2.033231735229492, "learning_rate": 0.00023854859240344416, "loss": 0.3503, "step": 500 }, { "epoch": 0.18791451731761238, "grad_norm": 1.2591739892959595, "learning_rate": 0.00023470184250991156, "loss": 0.3347, "step": 510 }, { "epoch": 0.18865143699336773, "eval_has_safety_issue_roc_auc": 0.941105104803345, "eval_loss": 0.3277105987071991, "eval_mean_roc_auc": 0.941105104803345, "eval_runtime": 487.352, "eval_samples_per_second": 76.372, "eval_steps_per_second": 2.388, "step": 512 }, { "epoch": 0.1915991156963891, "grad_norm": 3.9085240364074707, "learning_rate": 0.00023077150212399899, "loss": 0.3273, "step": 520 }, { "epoch": 0.19528371407516582, "grad_norm": 0.7869892120361328, "learning_rate": 0.00022676145001715174, "loss": 0.3515, "step": 530 }, { "epoch": 0.19896831245394253, "grad_norm": 0.9839105606079102, "learning_rate": 0.00022267564362665968, "loss": 0.3073, "step": 540 }, { "epoch": 0.20265291083271925, "grad_norm": 0.618803083896637, "learning_rate": 0.0002185181151501449, "loss": 0.3734, "step": 550 }, { "epoch": 0.20633750921149593, "grad_norm": 1.2680476903915405, "learning_rate": 0.00021429296756626925, "loss": 0.3323, "step": 560 }, { "epoch": 0.21002210759027265, "grad_norm": 1.8254988193511963, "learning_rate": 0.00021000437058558968, "loss": 0.35, "step": 570 }, { "epoch": 0.21370670596904937, "grad_norm": 1.5576568841934204, "learning_rate": 0.00020565655653555763, "loss": 0.2629, "step": 580 }, { "epoch": 0.21739130434782608, "grad_norm": 0.9699956178665161, "learning_rate": 0.0002012538161837225, "loss": 0.2764, "step": 590 }, { "epoch": 0.2210759027266028, "grad_norm": 0.5201606154441833, "learning_rate": 0.00019680049450326222, "loss": 0.267, "step": 600 }, { "epoch": 0.2247605011053795, "grad_norm": 0.876549243927002, "learning_rate": 0.00019230098638501938, "loss": 0.3421, "step": 610 }, { "epoch": 0.22844509948415623, "grad_norm": 0.6652014255523682, "learning_rate": 0.00018775973230027458, "loss": 0.3823, "step": 620 }, { "epoch": 0.23212969786293294, "grad_norm": 1.2416579723358154, "learning_rate": 0.00018318121391853708, "loss": 0.2984, "step": 630 }, { "epoch": 0.23581429624170966, "grad_norm": 0.8337903618812561, "learning_rate": 0.00017856994968467845, "loss": 0.2969, "step": 640 }, { "epoch": 0.23581429624170966, "eval_has_safety_issue_roc_auc": 0.9458881980708542, "eval_loss": 0.31154128909111023, "eval_mean_roc_auc": 0.9458881980708542, "eval_runtime": 487.6344, "eval_samples_per_second": 76.328, "eval_steps_per_second": 2.387, "step": 640 }, { "epoch": 0.23949889462048637, "grad_norm": 0.8940573930740356, "learning_rate": 0.00017393049035977292, "loss": 0.3488, "step": 650 }, { "epoch": 0.2431834929992631, "grad_norm": 1.5384459495544434, "learning_rate": 0.00016926741453004545, "loss": 0.3101, "step": 660 }, { "epoch": 0.2468680913780398, "grad_norm": 1.3179084062576294, "learning_rate": 0.00016458532408835993, "loss": 0.2165, "step": 670 }, { "epoch": 0.2505526897568165, "grad_norm": 1.6649147272109985, "learning_rate": 0.00015988883969270665, "loss": 0.2676, "step": 680 }, { "epoch": 0.2542372881355932, "grad_norm": 1.43660569190979, "learning_rate": 0.00015518259620617085, "loss": 0.3425, "step": 690 }, { "epoch": 0.2579218865143699, "grad_norm": 1.0876473188400269, "learning_rate": 0.00015047123812288193, "loss": 0.3139, "step": 700 }, { "epoch": 0.26160648489314664, "grad_norm": 1.0270347595214844, "learning_rate": 0.000145759414984459, "loss": 0.3059, "step": 710 }, { "epoch": 0.26529108327192336, "grad_norm": 0.6378604173660278, "learning_rate": 0.00014105177679147446, "loss": 0.2809, "step": 720 }, { "epoch": 0.26897568165070007, "grad_norm": 0.8198941349983215, "learning_rate": 0.00013635296941446449, "loss": 0.3358, "step": 730 }, { "epoch": 0.2726602800294768, "grad_norm": 0.7575820684432983, "learning_rate": 0.00013166763000901655, "loss": 0.3009, "step": 740 }, { "epoch": 0.2763448784082535, "grad_norm": 1.4956635236740112, "learning_rate": 0.00012700038243945594, "loss": 0.3287, "step": 750 }, { "epoch": 0.2800294767870302, "grad_norm": 0.945459246635437, "learning_rate": 0.00012235583271565003, "loss": 0.3243, "step": 760 }, { "epoch": 0.2829771554900516, "eval_has_safety_issue_roc_auc": 0.9507512976389587, "eval_loss": 0.2935161590576172, "eval_mean_roc_auc": 0.9507512976389587, "eval_runtime": 487.4239, "eval_samples_per_second": 76.361, "eval_steps_per_second": 2.388, "step": 768 }, { "epoch": 0.28371407516580693, "grad_norm": 1.6803070306777954, "learning_rate": 0.00011773856444743296, "loss": 0.2998, "step": 770 }, { "epoch": 0.28739867354458365, "grad_norm": 0.8402862548828125, "learning_rate": 0.00011315313432113607, "loss": 0.2805, "step": 780 }, { "epoch": 0.29108327192336036, "grad_norm": 0.950634241104126, "learning_rate": 0.00010860406760268816, "loss": 0.3111, "step": 790 }, { "epoch": 0.2947678703021371, "grad_norm": 0.8810547590255737, "learning_rate": 0.00010409585367172489, "loss": 0.2858, "step": 800 }, { "epoch": 0.2984524686809138, "grad_norm": 0.8042846322059631, "learning_rate": 9.96329415911129e-05, "loss": 0.3156, "step": 810 }, { "epoch": 0.3021370670596905, "grad_norm": 0.6347734332084656, "learning_rate": 9.521973571626184e-05, "loss": 0.347, "step": 820 }, { "epoch": 0.3058216654384672, "grad_norm": 1.017738938331604, "learning_rate": 9.086059134855733e-05, "loss": 0.2554, "step": 830 }, { "epoch": 0.30950626381724394, "grad_norm": 0.5769331455230713, "learning_rate": 8.655981043720452e-05, "loss": 0.2573, "step": 840 }, { "epoch": 0.31319086219602066, "grad_norm": 1.6331214904785156, "learning_rate": 8.232163733372322e-05, "loss": 0.3412, "step": 850 }, { "epoch": 0.3168754605747974, "grad_norm": 1.2017263174057007, "learning_rate": 7.815025460328584e-05, "loss": 0.245, "step": 860 }, { "epoch": 0.32056005895357403, "grad_norm": 0.7730036377906799, "learning_rate": 7.404977889703008e-05, "loss": 0.3052, "step": 870 }, { "epoch": 0.32424465733235075, "grad_norm": 0.643957793712616, "learning_rate": 7.00242568894217e-05, "loss": 0.2292, "step": 880 }, { "epoch": 0.32792925571112747, "grad_norm": 1.9052222967147827, "learning_rate": 6.607766128467497e-05, "loss": 0.3792, "step": 890 }, { "epoch": 0.3301400147383935, "eval_has_safety_issue_roc_auc": 0.9519071421594945, "eval_loss": 0.29572874307632446, "eval_mean_roc_auc": 0.9519071421594945, "eval_runtime": 487.4395, "eval_samples_per_second": 76.358, "eval_steps_per_second": 2.388, "step": 896 }, { "epoch": 0.3316138540899042, "grad_norm": 0.7808663249015808, "learning_rate": 6.221388689617348e-05, "loss": 0.2955, "step": 900 }, { "epoch": 0.3352984524686809, "grad_norm": 0.7185715436935425, "learning_rate": 5.843674680275963e-05, "loss": 0.2523, "step": 910 }, { "epoch": 0.3389830508474576, "grad_norm": 1.606952428817749, "learning_rate": 5.474996858568593e-05, "loss": 0.3108, "step": 920 }, { "epoch": 0.3426676492262343, "grad_norm": 0.7948682308197021, "learning_rate": 5.115719064994245e-05, "loss": 0.2796, "step": 930 }, { "epoch": 0.34635224760501104, "grad_norm": 0.7030922770500183, "learning_rate": 4.766195863359054e-05, "loss": 0.2397, "step": 940 }, { "epoch": 0.35003684598378776, "grad_norm": 1.6789747476577759, "learning_rate": 4.426772190864578e-05, "loss": 0.2918, "step": 950 }, { "epoch": 0.3537214443625645, "grad_norm": 1.2274649143218994, "learning_rate": 4.0977830176964584e-05, "loss": 0.2523, "step": 960 }, { "epoch": 0.3574060427413412, "grad_norm": 0.8976671695709229, "learning_rate": 3.77955301644926e-05, "loss": 0.2475, "step": 970 }, { "epoch": 0.3610906411201179, "grad_norm": 1.8330504894256592, "learning_rate": 3.472396241713854e-05, "loss": 0.319, "step": 980 }, { "epoch": 0.3647752394988946, "grad_norm": 0.5846236944198608, "learning_rate": 3.1766158201434e-05, "loss": 0.2242, "step": 990 }, { "epoch": 0.36845983787767134, "grad_norm": 1.2076009511947632, "learning_rate": 2.8925036513039986e-05, "loss": 0.306, "step": 1000 }, { "epoch": 0.37214443625644805, "grad_norm": 0.7948538064956665, "learning_rate": 2.620340119605006e-05, "loss": 0.2373, "step": 1010 }, { "epoch": 0.37582903463522477, "grad_norm": 2.274285316467285, "learning_rate": 2.360393817593514e-05, "loss": 0.2993, "step": 1020 }, { "epoch": 0.37730287398673545, "eval_has_safety_issue_roc_auc": 0.9530514004567643, "eval_loss": 0.2790428400039673, "eval_mean_roc_auc": 0.9530514004567643, "eval_runtime": 487.3953, "eval_samples_per_second": 76.365, "eval_steps_per_second": 2.388, "step": 1024 } ], "logging_steps": 10, "max_steps": 3200, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 128, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.466350275271066e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }