| { | |
| "best_global_step": 1024, | |
| "best_metric": 0.9530514004567643, | |
| "best_model_checkpoint": "checkpoints/checkpoint-1024", | |
| "epoch": 0.37730287398673545, | |
| "eval_steps": 128, | |
| "global_step": 1024, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0036845983787767134, | |
| "grad_norm": 8.914146423339844, | |
| "learning_rate": 1.3499999999999998e-05, | |
| "loss": 0.6841, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.007369196757553427, | |
| "grad_norm": 6.666998386383057, | |
| "learning_rate": 2.8499999999999998e-05, | |
| "loss": 0.517, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01105379513633014, | |
| "grad_norm": 5.241199016571045, | |
| "learning_rate": 4.3499999999999993e-05, | |
| "loss": 0.4514, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.014738393515106854, | |
| "grad_norm": 5.938083171844482, | |
| "learning_rate": 5.85e-05, | |
| "loss": 0.4963, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.018422991893883568, | |
| "grad_norm": 3.599522590637207, | |
| "learning_rate": 7.35e-05, | |
| "loss": 0.4226, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02210759027266028, | |
| "grad_norm": 4.397755146026611, | |
| "learning_rate": 8.849999999999998e-05, | |
| "loss": 0.4117, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.025792188651436992, | |
| "grad_norm": 11.364407539367676, | |
| "learning_rate": 0.00010349999999999998, | |
| "loss": 0.3636, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.029476787030213707, | |
| "grad_norm": 3.12703800201416, | |
| "learning_rate": 0.0001185, | |
| "loss": 0.3726, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03316138540899042, | |
| "grad_norm": 2.9937469959259033, | |
| "learning_rate": 0.0001335, | |
| "loss": 0.3421, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.036845983787767135, | |
| "grad_norm": 2.8763785362243652, | |
| "learning_rate": 0.00014849999999999998, | |
| "loss": 0.4045, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.040530582166543844, | |
| "grad_norm": 4.576082706451416, | |
| "learning_rate": 0.0001635, | |
| "loss": 0.3629, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04421518054532056, | |
| "grad_norm": 4.948099613189697, | |
| "learning_rate": 0.00017849999999999997, | |
| "loss": 0.394, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04716285924834193, | |
| "eval_has_safety_issue_roc_auc": 0.918227021974626, | |
| "eval_loss": 0.40556982159614563, | |
| "eval_mean_roc_auc": 0.918227021974626, | |
| "eval_runtime": 489.5112, | |
| "eval_samples_per_second": 76.035, | |
| "eval_steps_per_second": 2.378, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.047899778924097275, | |
| "grad_norm": 2.6524598598480225, | |
| "learning_rate": 0.0001935, | |
| "loss": 0.3734, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.051584377302873984, | |
| "grad_norm": 4.847095966339111, | |
| "learning_rate": 0.00020849999999999997, | |
| "loss": 0.3707, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.0552689756816507, | |
| "grad_norm": 2.1986277103424072, | |
| "learning_rate": 0.00022349999999999998, | |
| "loss": 0.3914, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.058953574060427415, | |
| "grad_norm": 1.9227941036224365, | |
| "learning_rate": 0.0002385, | |
| "loss": 0.3834, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06263817243920412, | |
| "grad_norm": 1.3873552083969116, | |
| "learning_rate": 0.0002535, | |
| "loss": 0.2576, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.06632277081798084, | |
| "grad_norm": 4.532320022583008, | |
| "learning_rate": 0.00026849999999999997, | |
| "loss": 0.4172, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07000736919675755, | |
| "grad_norm": 2.5069925785064697, | |
| "learning_rate": 0.00028349999999999995, | |
| "loss": 0.4712, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.07369196757553427, | |
| "grad_norm": 1.9502817392349243, | |
| "learning_rate": 0.0002985, | |
| "loss": 0.3573, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07737656595431099, | |
| "grad_norm": 6.129453182220459, | |
| "learning_rate": 0.00029994004614753843, | |
| "loss": 0.2982, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08106116433308769, | |
| "grad_norm": 4.473495006561279, | |
| "learning_rate": 0.00029973285979173177, | |
| "loss": 0.4217, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0847457627118644, | |
| "grad_norm": 1.3703476190567017, | |
| "learning_rate": 0.0002993779051807778, | |
| "loss": 0.3758, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08843036109064112, | |
| "grad_norm": 1.4541234970092773, | |
| "learning_rate": 0.00029887553261202354, | |
| "loss": 0.3863, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.09211495946941783, | |
| "grad_norm": 1.872299075126648, | |
| "learning_rate": 0.00029822623786654207, | |
| "loss": 0.3525, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09432571849668386, | |
| "eval_has_safety_issue_roc_auc": 0.9362037324386603, | |
| "eval_loss": 0.33471250534057617, | |
| "eval_mean_roc_auc": 0.9362037324386603, | |
| "eval_runtime": 487.3367, | |
| "eval_samples_per_second": 76.374, | |
| "eval_steps_per_second": 2.388, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.09579955784819455, | |
| "grad_norm": 2.644226312637329, | |
| "learning_rate": 0.0002974306617198568, | |
| "loss": 0.3905, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.09948415622697127, | |
| "grad_norm": 2.3409223556518555, | |
| "learning_rate": 0.0002964895893095737, | |
| "loss": 0.3515, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.10316875460574797, | |
| "grad_norm": 2.0618879795074463, | |
| "learning_rate": 0.00029540394936054435, | |
| "loss": 0.3592, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.10685335298452468, | |
| "grad_norm": 0.9815084338188171, | |
| "learning_rate": 0.00029417481326832776, | |
| "loss": 0.3944, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1105379513633014, | |
| "grad_norm": 4.9908013343811035, | |
| "learning_rate": 0.00029280339404185146, | |
| "loss": 0.4092, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.11422254974207811, | |
| "grad_norm": 0.6844871640205383, | |
| "learning_rate": 0.00029129104510631853, | |
| "loss": 0.4466, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.11790714812085483, | |
| "grad_norm": 1.8045854568481445, | |
| "learning_rate": 0.00028963925896754035, | |
| "loss": 0.3698, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.12159174649963155, | |
| "grad_norm": 4.337238788604736, | |
| "learning_rate": 0.00028784966573901314, | |
| "loss": 0.3892, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.12527634487840825, | |
| "grad_norm": 0.8228144645690918, | |
| "learning_rate": 0.0002859240315331935, | |
| "loss": 0.417, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.12896094325718496, | |
| "grad_norm": 1.259918212890625, | |
| "learning_rate": 0.00028386425671855764, | |
| "loss": 0.3393, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.13264554163596168, | |
| "grad_norm": 4.035144805908203, | |
| "learning_rate": 0.00028167237404416826, | |
| "loss": 0.2893, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1363301400147384, | |
| "grad_norm": 1.1749995946884155, | |
| "learning_rate": 0.0002793505466335956, | |
| "loss": 0.4133, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1400147383935151, | |
| "grad_norm": 1.3367811441421509, | |
| "learning_rate": 0.0002769010658501763, | |
| "loss": 0.3775, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.1414885777450258, | |
| "eval_has_safety_issue_roc_auc": 0.9363893661137548, | |
| "eval_loss": 0.3488316535949707, | |
| "eval_mean_roc_auc": 0.9363893661137548, | |
| "eval_runtime": 487.0696, | |
| "eval_samples_per_second": 76.416, | |
| "eval_steps_per_second": 2.39, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.14369933677229182, | |
| "grad_norm": 1.7076958417892456, | |
| "learning_rate": 0.00027432634903571426, | |
| "loss": 0.4061, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.14738393515106854, | |
| "grad_norm": 1.1982675790786743, | |
| "learning_rate": 0.000271628937124856, | |
| "loss": 0.3591, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.15106853352984526, | |
| "grad_norm": 0.9970278143882751, | |
| "learning_rate": 0.000268811492137495, | |
| "loss": 0.3556, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.15475313190862197, | |
| "grad_norm": 3.2783939838409424, | |
| "learning_rate": 0.0002658767945516796, | |
| "loss": 0.3119, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.1584377302873987, | |
| "grad_norm": 0.9100527763366699, | |
| "learning_rate": 0.0002628277405596167, | |
| "loss": 0.3235, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.16212232866617537, | |
| "grad_norm": 2.0034751892089844, | |
| "learning_rate": 0.0002596673392094796, | |
| "loss": 0.3409, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1658069270449521, | |
| "grad_norm": 2.4519588947296143, | |
| "learning_rate": 0.00025639870943584104, | |
| "loss": 0.3431, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1694915254237288, | |
| "grad_norm": 3.1098690032958984, | |
| "learning_rate": 0.0002530250769816612, | |
| "loss": 0.3966, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.17317612380250552, | |
| "grad_norm": 1.0801705121994019, | |
| "learning_rate": 0.0002495497712148688, | |
| "loss": 0.3026, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.17686072218128224, | |
| "grad_norm": 2.5059814453125, | |
| "learning_rate": 0.00024597622184267673, | |
| "loss": 0.3053, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.18054532056005895, | |
| "grad_norm": 4.749985694885254, | |
| "learning_rate": 0.00024230795552687568, | |
| "loss": 0.3585, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.18422991893883567, | |
| "grad_norm": 2.033231735229492, | |
| "learning_rate": 0.00023854859240344416, | |
| "loss": 0.3503, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.18791451731761238, | |
| "grad_norm": 1.2591739892959595, | |
| "learning_rate": 0.00023470184250991156, | |
| "loss": 0.3347, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.18865143699336773, | |
| "eval_has_safety_issue_roc_auc": 0.941105104803345, | |
| "eval_loss": 0.3277105987071991, | |
| "eval_mean_roc_auc": 0.941105104803345, | |
| "eval_runtime": 487.352, | |
| "eval_samples_per_second": 76.372, | |
| "eval_steps_per_second": 2.388, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.1915991156963891, | |
| "grad_norm": 3.9085240364074707, | |
| "learning_rate": 0.00023077150212399899, | |
| "loss": 0.3273, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.19528371407516582, | |
| "grad_norm": 0.7869892120361328, | |
| "learning_rate": 0.00022676145001715174, | |
| "loss": 0.3515, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.19896831245394253, | |
| "grad_norm": 0.9839105606079102, | |
| "learning_rate": 0.00022267564362665968, | |
| "loss": 0.3073, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.20265291083271925, | |
| "grad_norm": 0.618803083896637, | |
| "learning_rate": 0.0002185181151501449, | |
| "loss": 0.3734, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.20633750921149593, | |
| "grad_norm": 1.2680476903915405, | |
| "learning_rate": 0.00021429296756626925, | |
| "loss": 0.3323, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.21002210759027265, | |
| "grad_norm": 1.8254988193511963, | |
| "learning_rate": 0.00021000437058558968, | |
| "loss": 0.35, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.21370670596904937, | |
| "grad_norm": 1.5576568841934204, | |
| "learning_rate": 0.00020565655653555763, | |
| "loss": 0.2629, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.21739130434782608, | |
| "grad_norm": 0.9699956178665161, | |
| "learning_rate": 0.0002012538161837225, | |
| "loss": 0.2764, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.2210759027266028, | |
| "grad_norm": 0.5201606154441833, | |
| "learning_rate": 0.00019680049450326222, | |
| "loss": 0.267, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2247605011053795, | |
| "grad_norm": 0.876549243927002, | |
| "learning_rate": 0.00019230098638501938, | |
| "loss": 0.3421, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.22844509948415623, | |
| "grad_norm": 0.6652014255523682, | |
| "learning_rate": 0.00018775973230027458, | |
| "loss": 0.3823, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.23212969786293294, | |
| "grad_norm": 1.2416579723358154, | |
| "learning_rate": 0.00018318121391853708, | |
| "loss": 0.2984, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.23581429624170966, | |
| "grad_norm": 0.8337903618812561, | |
| "learning_rate": 0.00017856994968467845, | |
| "loss": 0.2969, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.23581429624170966, | |
| "eval_has_safety_issue_roc_auc": 0.9458881980708542, | |
| "eval_loss": 0.31154128909111023, | |
| "eval_mean_roc_auc": 0.9458881980708542, | |
| "eval_runtime": 487.6344, | |
| "eval_samples_per_second": 76.328, | |
| "eval_steps_per_second": 2.387, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.23949889462048637, | |
| "grad_norm": 0.8940573930740356, | |
| "learning_rate": 0.00017393049035977292, | |
| "loss": 0.3488, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2431834929992631, | |
| "grad_norm": 1.5384459495544434, | |
| "learning_rate": 0.00016926741453004545, | |
| "loss": 0.3101, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2468680913780398, | |
| "grad_norm": 1.3179084062576294, | |
| "learning_rate": 0.00016458532408835993, | |
| "loss": 0.2165, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2505526897568165, | |
| "grad_norm": 1.6649147272109985, | |
| "learning_rate": 0.00015988883969270665, | |
| "loss": 0.2676, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.2542372881355932, | |
| "grad_norm": 1.43660569190979, | |
| "learning_rate": 0.00015518259620617085, | |
| "loss": 0.3425, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2579218865143699, | |
| "grad_norm": 1.0876473188400269, | |
| "learning_rate": 0.00015047123812288193, | |
| "loss": 0.3139, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.26160648489314664, | |
| "grad_norm": 1.0270347595214844, | |
| "learning_rate": 0.000145759414984459, | |
| "loss": 0.3059, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.26529108327192336, | |
| "grad_norm": 0.6378604173660278, | |
| "learning_rate": 0.00014105177679147446, | |
| "loss": 0.2809, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.26897568165070007, | |
| "grad_norm": 0.8198941349983215, | |
| "learning_rate": 0.00013635296941446449, | |
| "loss": 0.3358, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.2726602800294768, | |
| "grad_norm": 0.7575820684432983, | |
| "learning_rate": 0.00013166763000901655, | |
| "loss": 0.3009, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2763448784082535, | |
| "grad_norm": 1.4956635236740112, | |
| "learning_rate": 0.00012700038243945594, | |
| "loss": 0.3287, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2800294767870302, | |
| "grad_norm": 0.945459246635437, | |
| "learning_rate": 0.00012235583271565003, | |
| "loss": 0.3243, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.2829771554900516, | |
| "eval_has_safety_issue_roc_auc": 0.9507512976389587, | |
| "eval_loss": 0.2935161590576172, | |
| "eval_mean_roc_auc": 0.9507512976389587, | |
| "eval_runtime": 487.4239, | |
| "eval_samples_per_second": 76.361, | |
| "eval_steps_per_second": 2.388, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.28371407516580693, | |
| "grad_norm": 1.6803070306777954, | |
| "learning_rate": 0.00011773856444743296, | |
| "loss": 0.2998, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.28739867354458365, | |
| "grad_norm": 0.8402862548828125, | |
| "learning_rate": 0.00011315313432113607, | |
| "loss": 0.2805, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.29108327192336036, | |
| "grad_norm": 0.950634241104126, | |
| "learning_rate": 0.00010860406760268816, | |
| "loss": 0.3111, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2947678703021371, | |
| "grad_norm": 0.8810547590255737, | |
| "learning_rate": 0.00010409585367172489, | |
| "loss": 0.2858, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2984524686809138, | |
| "grad_norm": 0.8042846322059631, | |
| "learning_rate": 9.96329415911129e-05, | |
| "loss": 0.3156, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.3021370670596905, | |
| "grad_norm": 0.6347734332084656, | |
| "learning_rate": 9.521973571626184e-05, | |
| "loss": 0.347, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.3058216654384672, | |
| "grad_norm": 1.017738938331604, | |
| "learning_rate": 9.086059134855733e-05, | |
| "loss": 0.2554, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.30950626381724394, | |
| "grad_norm": 0.5769331455230713, | |
| "learning_rate": 8.655981043720452e-05, | |
| "loss": 0.2573, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.31319086219602066, | |
| "grad_norm": 1.6331214904785156, | |
| "learning_rate": 8.232163733372322e-05, | |
| "loss": 0.3412, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3168754605747974, | |
| "grad_norm": 1.2017263174057007, | |
| "learning_rate": 7.815025460328584e-05, | |
| "loss": 0.245, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.32056005895357403, | |
| "grad_norm": 0.7730036377906799, | |
| "learning_rate": 7.404977889703008e-05, | |
| "loss": 0.3052, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.32424465733235075, | |
| "grad_norm": 0.643957793712616, | |
| "learning_rate": 7.00242568894217e-05, | |
| "loss": 0.2292, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.32792925571112747, | |
| "grad_norm": 1.9052222967147827, | |
| "learning_rate": 6.607766128467497e-05, | |
| "loss": 0.3792, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.3301400147383935, | |
| "eval_has_safety_issue_roc_auc": 0.9519071421594945, | |
| "eval_loss": 0.29572874307632446, | |
| "eval_mean_roc_auc": 0.9519071421594945, | |
| "eval_runtime": 487.4395, | |
| "eval_samples_per_second": 76.358, | |
| "eval_steps_per_second": 2.388, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.3316138540899042, | |
| "grad_norm": 0.7808663249015808, | |
| "learning_rate": 6.221388689617348e-05, | |
| "loss": 0.2955, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3352984524686809, | |
| "grad_norm": 0.7185715436935425, | |
| "learning_rate": 5.843674680275963e-05, | |
| "loss": 0.2523, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.3389830508474576, | |
| "grad_norm": 1.606952428817749, | |
| "learning_rate": 5.474996858568593e-05, | |
| "loss": 0.3108, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.3426676492262343, | |
| "grad_norm": 0.7948682308197021, | |
| "learning_rate": 5.115719064994245e-05, | |
| "loss": 0.2796, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.34635224760501104, | |
| "grad_norm": 0.7030922770500183, | |
| "learning_rate": 4.766195863359054e-05, | |
| "loss": 0.2397, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.35003684598378776, | |
| "grad_norm": 1.6789747476577759, | |
| "learning_rate": 4.426772190864578e-05, | |
| "loss": 0.2918, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3537214443625645, | |
| "grad_norm": 1.2274649143218994, | |
| "learning_rate": 4.0977830176964584e-05, | |
| "loss": 0.2523, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.3574060427413412, | |
| "grad_norm": 0.8976671695709229, | |
| "learning_rate": 3.77955301644926e-05, | |
| "loss": 0.2475, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.3610906411201179, | |
| "grad_norm": 1.8330504894256592, | |
| "learning_rate": 3.472396241713854e-05, | |
| "loss": 0.319, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3647752394988946, | |
| "grad_norm": 0.5846236944198608, | |
| "learning_rate": 3.1766158201434e-05, | |
| "loss": 0.2242, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.36845983787767134, | |
| "grad_norm": 1.2076009511947632, | |
| "learning_rate": 2.8925036513039986e-05, | |
| "loss": 0.306, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.37214443625644805, | |
| "grad_norm": 0.7948538064956665, | |
| "learning_rate": 2.620340119605006e-05, | |
| "loss": 0.2373, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.37582903463522477, | |
| "grad_norm": 2.274285316467285, | |
| "learning_rate": 2.360393817593514e-05, | |
| "loss": 0.2993, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.37730287398673545, | |
| "eval_has_safety_issue_roc_auc": 0.9530514004567643, | |
| "eval_loss": 0.2790428400039673, | |
| "eval_mean_roc_auc": 0.9530514004567643, | |
| "eval_runtime": 487.3953, | |
| "eval_samples_per_second": 76.365, | |
| "eval_steps_per_second": 2.388, | |
| "step": 1024 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 128, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.466350275271066e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |