| { | |
| "best_metric": 0.8023715415019763, | |
| "best_model_checkpoint": "./toxicity_c_202201181030/checkpoint-3000", | |
| "epoch": 1.2414895617829602, | |
| "global_step": 3300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.94e-05, | |
| "loss": 0.2336, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_accuracy": 0.9445713034502174, | |
| "eval_f1": 0.6696495152870993, | |
| "eval_loss": 0.1531878262758255, | |
| "eval_precision": 0.8655421686746988, | |
| "eval_recall": 0.5460626330191548, | |
| "eval_runtime": 209.3715, | |
| "eval_samples_per_second": 152.69, | |
| "eval_steps_per_second": 19.09, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.88e-05, | |
| "loss": 0.1549, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 0.9456973943507773, | |
| "eval_f1": 0.6756352765321375, | |
| "eval_loss": 0.1629406213760376, | |
| "eval_precision": 0.8763936015511391, | |
| "eval_recall": 0.5497111584068106, | |
| "eval_runtime": 209.809, | |
| "eval_samples_per_second": 152.372, | |
| "eval_steps_per_second": 19.051, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.8199999999999998e-05, | |
| "loss": 0.158, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_accuracy": 0.949920235227877, | |
| "eval_f1": 0.7275820997107366, | |
| "eval_loss": 0.13407668471336365, | |
| "eval_precision": 0.8261205564142194, | |
| "eval_recall": 0.6500456065673457, | |
| "eval_runtime": 210.4551, | |
| "eval_samples_per_second": 151.904, | |
| "eval_steps_per_second": 18.992, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.7600000000000003e-05, | |
| "loss": 0.1321, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 0.9515468109731302, | |
| "eval_f1": 0.7355301348813386, | |
| "eval_loss": 0.1435689479112625, | |
| "eval_precision": 0.8387850467289719, | |
| "eval_recall": 0.6549103070842202, | |
| "eval_runtime": 209.6366, | |
| "eval_samples_per_second": 152.497, | |
| "eval_steps_per_second": 19.066, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 0.1408, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.9473865307016172, | |
| "eval_f1": 0.7431276725717777, | |
| "eval_loss": 0.13682132959365845, | |
| "eval_precision": 0.7465480208652961, | |
| "eval_recall": 0.739738522347218, | |
| "eval_runtime": 210.8965, | |
| "eval_samples_per_second": 151.586, | |
| "eval_steps_per_second": 18.952, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 2.64e-05, | |
| "loss": 0.1385, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_accuracy": 0.9507960837060903, | |
| "eval_f1": 0.7527118377613583, | |
| "eval_loss": 0.12854768335819244, | |
| "eval_precision": 0.779296875, | |
| "eval_recall": 0.7278808148373366, | |
| "eval_runtime": 210.2245, | |
| "eval_samples_per_second": 152.071, | |
| "eval_steps_per_second": 19.013, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 2.58e-05, | |
| "loss": 0.1313, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_accuracy": 0.9529857049016235, | |
| "eval_f1": 0.7370078740157481, | |
| "eval_loss": 0.14518482983112335, | |
| "eval_precision": 0.8680956306677658, | |
| "eval_recall": 0.6403162055335968, | |
| "eval_runtime": 210.6173, | |
| "eval_samples_per_second": 151.787, | |
| "eval_steps_per_second": 18.978, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 2.52e-05, | |
| "loss": 0.1399, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_accuracy": 0.949826394319497, | |
| "eval_f1": 0.7610250297973777, | |
| "eval_loss": 0.13412030041217804, | |
| "eval_precision": 0.7461291264972246, | |
| "eval_recall": 0.7765278200060809, | |
| "eval_runtime": 209.9319, | |
| "eval_samples_per_second": 152.283, | |
| "eval_steps_per_second": 19.04, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 2.4599999999999998e-05, | |
| "loss": 0.1272, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_accuracy": 0.9536425912602834, | |
| "eval_f1": 0.7640878701050622, | |
| "eval_loss": 0.12946395576000214, | |
| "eval_precision": 0.8018710324089542, | |
| "eval_recall": 0.7297050775311645, | |
| "eval_runtime": 210.1105, | |
| "eval_samples_per_second": 152.153, | |
| "eval_steps_per_second": 19.023, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.1324, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.9478244549407239, | |
| "eval_f1": 0.7562116340251388, | |
| "eval_loss": 0.14241644740104675, | |
| "eval_precision": 0.7281170841542358, | |
| "eval_recall": 0.7865612648221344, | |
| "eval_runtime": 210.5453, | |
| "eval_samples_per_second": 151.839, | |
| "eval_steps_per_second": 18.984, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.3400000000000003e-05, | |
| "loss": 0.1198, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.9530482655072101, | |
| "eval_f1": 0.7681137030743087, | |
| "eval_loss": 0.1265845000743866, | |
| "eval_precision": 0.7807788944723618, | |
| "eval_recall": 0.7558528428093646, | |
| "eval_runtime": 210.173, | |
| "eval_samples_per_second": 152.108, | |
| "eval_steps_per_second": 19.018, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.2800000000000002e-05, | |
| "loss": 0.1232, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.9558634927586099, | |
| "eval_f1": 0.7611308616895208, | |
| "eval_loss": 0.11988817900419235, | |
| "eval_precision": 0.8586707410236822, | |
| "eval_recall": 0.6834904226208574, | |
| "eval_runtime": 209.5961, | |
| "eval_samples_per_second": 152.527, | |
| "eval_steps_per_second": 19.07, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.22e-05, | |
| "loss": 0.1239, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.9564578185116832, | |
| "eval_f1": 0.7667560321715818, | |
| "eval_loss": 0.12424681335687637, | |
| "eval_precision": 0.8540500186636805, | |
| "eval_recall": 0.6956521739130435, | |
| "eval_runtime": 210.1306, | |
| "eval_samples_per_second": 152.139, | |
| "eval_steps_per_second": 19.022, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.16e-05, | |
| "loss": 0.1236, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.9570834245675498, | |
| "eval_f1": 0.7704918032786885, | |
| "eval_loss": 0.1350642293691635, | |
| "eval_precision": 0.8564522127184827, | |
| "eval_recall": 0.7002128306476133, | |
| "eval_runtime": 210.8014, | |
| "eval_samples_per_second": 151.655, | |
| "eval_steps_per_second": 18.961, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.1e-05, | |
| "loss": 0.1254, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.9575526291094498, | |
| "eval_f1": 0.7775774463202754, | |
| "eval_loss": 0.11828587204217911, | |
| "eval_precision": 0.8435277382645804, | |
| "eval_recall": 0.7211918516266342, | |
| "eval_runtime": 209.8683, | |
| "eval_samples_per_second": 152.329, | |
| "eval_steps_per_second": 19.045, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.04e-05, | |
| "loss": 0.1252, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_accuracy": 0.9568644624479965, | |
| "eval_f1": 0.7638294228463779, | |
| "eval_loss": 0.12166079878807068, | |
| "eval_precision": 0.8745098039215686, | |
| "eval_recall": 0.6780176345393737, | |
| "eval_runtime": 210.147, | |
| "eval_samples_per_second": 152.127, | |
| "eval_steps_per_second": 19.02, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.98e-05, | |
| "loss": 0.1256, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.9574275078982765, | |
| "eval_f1": 0.7709910819451455, | |
| "eval_loss": 0.11569029092788696, | |
| "eval_precision": 0.8632253202712886, | |
| "eval_recall": 0.6965643052599574, | |
| "eval_runtime": 210.5963, | |
| "eval_samples_per_second": 151.802, | |
| "eval_steps_per_second": 18.979, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.9200000000000003e-05, | |
| "loss": 0.1324, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_accuracy": 0.95426819731615, | |
| "eval_f1": 0.7747303543913714, | |
| "eval_loss": 0.12124760448932648, | |
| "eval_precision": 0.7853795688847235, | |
| "eval_recall": 0.7643660687138948, | |
| "eval_runtime": 210.0339, | |
| "eval_samples_per_second": 152.209, | |
| "eval_steps_per_second": 19.03, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.86e-05, | |
| "loss": 0.1192, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.9576777503206231, | |
| "eval_f1": 0.7719534805326143, | |
| "eval_loss": 0.12099746614694595, | |
| "eval_precision": 0.8661119515885023, | |
| "eval_recall": 0.6962602614776527, | |
| "eval_runtime": 209.8273, | |
| "eval_samples_per_second": 152.359, | |
| "eval_steps_per_second": 19.049, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.1234, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_accuracy": 0.9573023866871031, | |
| "eval_f1": 0.781074578989575, | |
| "eval_loss": 0.11586691439151764, | |
| "eval_precision": 0.8265444670739986, | |
| "eval_recall": 0.7403466099118273, | |
| "eval_runtime": 209.7848, | |
| "eval_samples_per_second": 152.39, | |
| "eval_steps_per_second": 19.053, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.74e-05, | |
| "loss": 0.1129, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.9564578185116832, | |
| "eval_f1": 0.7782733354571519, | |
| "eval_loss": 0.12270316481590271, | |
| "eval_precision": 0.8173302107728337, | |
| "eval_recall": 0.7427789601702646, | |
| "eval_runtime": 209.6077, | |
| "eval_samples_per_second": 152.518, | |
| "eval_steps_per_second": 19.069, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.6800000000000002e-05, | |
| "loss": 0.1244, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.9576777503206231, | |
| "eval_f1": 0.776769509981851, | |
| "eval_loss": 0.11879286915063858, | |
| "eval_precision": 0.8492063492063492, | |
| "eval_recall": 0.7157190635451505, | |
| "eval_runtime": 209.8694, | |
| "eval_samples_per_second": 152.328, | |
| "eval_steps_per_second": 19.045, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.62e-05, | |
| "loss": 0.1198, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.9573336669898964, | |
| "eval_f1": 0.7855345911949686, | |
| "eval_loss": 0.12149158120155334, | |
| "eval_precision": 0.8134158254640182, | |
| "eval_recall": 0.7595013681970204, | |
| "eval_runtime": 209.8421, | |
| "eval_samples_per_second": 152.348, | |
| "eval_steps_per_second": 19.048, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.56e-05, | |
| "loss": 0.1167, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.9582095154681097, | |
| "eval_f1": 0.7829054273643159, | |
| "eval_loss": 0.11379627883434296, | |
| "eval_precision": 0.8408376963350785, | |
| "eval_recall": 0.7324414715719063, | |
| "eval_runtime": 210.0964, | |
| "eval_samples_per_second": 152.163, | |
| "eval_steps_per_second": 19.025, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.1162, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.9579592730457631, | |
| "eval_f1": 0.7870722433460076, | |
| "eval_loss": 0.11324156820774078, | |
| "eval_precision": 0.8217002977174992, | |
| "eval_recall": 0.7552447552447552, | |
| "eval_runtime": 210.2617, | |
| "eval_samples_per_second": 152.044, | |
| "eval_steps_per_second": 19.01, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.44e-05, | |
| "loss": 0.1202, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.9577403109262098, | |
| "eval_f1": 0.785385226370135, | |
| "eval_loss": 0.11547254770994186, | |
| "eval_precision": 0.8223552894211577, | |
| "eval_recall": 0.7515962298570994, | |
| "eval_runtime": 209.2873, | |
| "eval_samples_per_second": 152.752, | |
| "eval_steps_per_second": 19.098, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.3800000000000002e-05, | |
| "loss": 0.1069, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_accuracy": 0.9589289624323564, | |
| "eval_f1": 0.7883282282766404, | |
| "eval_loss": 0.11415638774633408, | |
| "eval_precision": 0.8390528483184626, | |
| "eval_recall": 0.7433870477348739, | |
| "eval_runtime": 209.3049, | |
| "eval_samples_per_second": 152.739, | |
| "eval_steps_per_second": 19.097, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 1.32e-05, | |
| "loss": 0.0866, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_accuracy": 0.9558947730614032, | |
| "eval_f1": 0.7875866224766495, | |
| "eval_loss": 0.12708334624767303, | |
| "eval_precision": 0.7805315019408778, | |
| "eval_recall": 0.7947704469443599, | |
| "eval_runtime": 208.8539, | |
| "eval_samples_per_second": 153.069, | |
| "eval_steps_per_second": 19.138, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 1.26e-05, | |
| "loss": 0.0913, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_accuracy": 0.9557696518502299, | |
| "eval_f1": 0.7855626326963907, | |
| "eval_loss": 0.12602387368679047, | |
| "eval_precision": 0.783661119515885, | |
| "eval_recall": 0.7874733961690483, | |
| "eval_runtime": 209.1412, | |
| "eval_samples_per_second": 152.858, | |
| "eval_steps_per_second": 19.111, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.087, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_accuracy": 0.95548812912509, | |
| "eval_f1": 0.7876436352783167, | |
| "eval_loss": 0.12803590297698975, | |
| "eval_precision": 0.7734466588511137, | |
| "eval_recall": 0.8023715415019763, | |
| "eval_runtime": 209.0203, | |
| "eval_samples_per_second": 152.947, | |
| "eval_steps_per_second": 19.123, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 1.1400000000000001e-05, | |
| "loss": 0.0839, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_accuracy": 0.958991523037943, | |
| "eval_f1": 0.7922016167379933, | |
| "eval_loss": 0.12513290345668793, | |
| "eval_precision": 0.8274834437086093, | |
| "eval_recall": 0.759805411979325, | |
| "eval_runtime": 209.1866, | |
| "eval_samples_per_second": 152.825, | |
| "eval_steps_per_second": 19.107, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.08e-05, | |
| "loss": 0.0889, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_accuracy": 0.9583033563764898, | |
| "eval_f1": 0.7890489001424276, | |
| "eval_loss": 0.12521126866340637, | |
| "eval_precision": 0.8227722772277227, | |
| "eval_recall": 0.7579811492854971, | |
| "eval_runtime": 209.1839, | |
| "eval_samples_per_second": 152.827, | |
| "eval_steps_per_second": 19.108, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 1.02e-05, | |
| "loss": 0.0887, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_accuracy": 0.9562701366949232, | |
| "eval_f1": 0.7879890809827116, | |
| "eval_loss": 0.12457986176013947, | |
| "eval_precision": 0.7860816944024206, | |
| "eval_recall": 0.7899057464274856, | |
| "eval_runtime": 209.6152, | |
| "eval_samples_per_second": 152.513, | |
| "eval_steps_per_second": 19.068, | |
| "step": 3300 | |
| } | |
| ], | |
| "max_steps": 5000, | |
| "num_train_epochs": 2, | |
| "total_flos": 2.279247411490296e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |