{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.926393270241851, "eval_steps": 100.0, "global_step": 1180, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 5e-06, "loss": 4.1311, "step": 10 }, { "epoch": 0.17, "learning_rate": 1e-05, "loss": 4.0983, "step": 20 }, { "epoch": 0.25, "learning_rate": 1.5e-05, "loss": 3.9944, "step": 30 }, { "epoch": 0.34, "learning_rate": 2e-05, "loss": 3.8495, "step": 40 }, { "epoch": 0.42, "learning_rate": 2.5e-05, "loss": 3.5944, "step": 50 }, { "epoch": 0.5, "learning_rate": 3e-05, "loss": 3.1649, "step": 60 }, { "epoch": 0.59, "learning_rate": 3.5e-05, "loss": 3.0072, "step": 70 }, { "epoch": 0.67, "learning_rate": 4e-05, "loss": 2.9697, "step": 80 }, { "epoch": 0.76, "learning_rate": 4.5e-05, "loss": 2.8497, "step": 90 }, { "epoch": 0.84, "learning_rate": 5e-05, "loss": 2.8376, "step": 100 }, { "epoch": 0.93, "learning_rate": 4.998942375205502e-05, "loss": 2.8255, "step": 110 }, { "epoch": 1.01, "learning_rate": 4.995770395678171e-05, "loss": 2.7066, "step": 120 }, { "epoch": 1.09, "learning_rate": 4.990486745229364e-05, "loss": 2.6717, "step": 130 }, { "epoch": 1.18, "learning_rate": 4.983095894354858e-05, "loss": 2.6093, "step": 140 }, { "epoch": 1.26, "learning_rate": 4.973604096452361e-05, "loss": 2.588, "step": 150 }, { "epoch": 1.35, "learning_rate": 4.962019382530521e-05, "loss": 2.5881, "step": 160 }, { "epoch": 1.43, "learning_rate": 4.948351554413879e-05, "loss": 2.4645, "step": 170 }, { "epoch": 1.51, "learning_rate": 4.9326121764495596e-05, "loss": 2.4118, "step": 180 }, { "epoch": 1.6, "learning_rate": 4.914814565722671e-05, "loss": 2.5078, "step": 190 }, { "epoch": 1.68, "learning_rate": 4.894973780788722e-05, "loss": 2.3461, "step": 200 }, { "epoch": 1.77, "learning_rate": 4.873106608932585e-05, "loss": 2.3559, "step": 210 }, { "epoch": 1.85, "learning_rate": 4.849231551964771e-05, "loss": 2.4097, "step": 220 }, { "epoch": 1.93, "learning_rate": 4.823368810567056e-05, "loss": 2.2607, "step": 230 }, { "epoch": 2.02, "learning_rate": 4.7955402672006854e-05, "loss": 2.1208, "step": 240 }, { "epoch": 2.1, "learning_rate": 4.765769467591625e-05, "loss": 2.112, "step": 250 }, { "epoch": 2.19, "learning_rate": 4.734081600808531e-05, "loss": 2.1259, "step": 260 }, { "epoch": 2.27, "learning_rate": 4.700503477950278e-05, "loss": 2.1712, "step": 270 }, { "epoch": 2.36, "learning_rate": 4.665063509461097e-05, "loss": 1.9872, "step": 280 }, { "epoch": 2.44, "learning_rate": 4.627791681092499e-05, "loss": 1.9918, "step": 290 }, { "epoch": 2.52, "learning_rate": 4.588719528532342e-05, "loss": 2.0882, "step": 300 }, { "epoch": 2.61, "learning_rate": 4.54788011072248e-05, "loss": 1.9361, "step": 310 }, { "epoch": 2.69, "learning_rate": 4.50530798188761e-05, "loss": 1.9715, "step": 320 }, { "epoch": 2.78, "learning_rate": 4.4610391622989396e-05, "loss": 1.8659, "step": 330 }, { "epoch": 2.86, "learning_rate": 4.415111107797445e-05, "loss": 1.8453, "step": 340 }, { "epoch": 2.94, "learning_rate": 4.36756267810249e-05, "loss": 1.9347, "step": 350 }, { "epoch": 3.03, "learning_rate": 4.318434103932622e-05, "loss": 1.8471, "step": 360 }, { "epoch": 3.11, "learning_rate": 4.267766952966369e-05, "loss": 1.7312, "step": 370 }, { "epoch": 3.2, "learning_rate": 4.215604094671835e-05, "loss": 1.5693, "step": 380 }, { "epoch": 3.28, "learning_rate": 4.1619896640348445e-05, "loss": 1.7114, "step": 390 }, { "epoch": 3.36, "learning_rate": 4.1069690242163484e-05, "loss": 1.5693, "step": 400 }, { "epoch": 3.45, "learning_rate": 4.05058872817065e-05, "loss": 1.588, "step": 410 }, { "epoch": 3.53, "learning_rate": 3.9928964792569655e-05, "loss": 1.6776, "step": 420 }, { "epoch": 3.62, "learning_rate": 3.933941090877615e-05, "loss": 1.633, "step": 430 }, { "epoch": 3.7, "learning_rate": 3.873772445177015e-05, "loss": 1.5435, "step": 440 }, { "epoch": 3.79, "learning_rate": 3.8124414508364e-05, "loss": 1.522, "step": 450 }, { "epoch": 3.87, "learning_rate": 3.7500000000000003e-05, "loss": 1.4307, "step": 460 }, { "epoch": 3.95, "learning_rate": 3.686500924369101e-05, "loss": 1.6171, "step": 470 }, { "epoch": 4.04, "learning_rate": 3.621997950501156e-05, "loss": 1.482, "step": 480 }, { "epoch": 4.12, "learning_rate": 3.556545654351749e-05, "loss": 1.4552, "step": 490 }, { "epoch": 4.21, "learning_rate": 3.490199415097892e-05, "loss": 1.2726, "step": 500 }, { "epoch": 4.29, "learning_rate": 3.423015368281711e-05, "loss": 1.312, "step": 510 }, { "epoch": 4.37, "learning_rate": 3.355050358314172e-05, "loss": 1.3236, "step": 520 }, { "epoch": 4.46, "learning_rate": 3.2863618903790346e-05, "loss": 1.2786, "step": 530 }, { "epoch": 4.54, "learning_rate": 3.217008081777726e-05, "loss": 1.2587, "step": 540 }, { "epoch": 4.63, "learning_rate": 3.147047612756302e-05, "loss": 1.3509, "step": 550 }, { "epoch": 4.71, "learning_rate": 3.076539676856101e-05, "loss": 1.2632, "step": 560 }, { "epoch": 4.79, "learning_rate": 3.0055439308300952e-05, "loss": 1.2215, "step": 570 }, { "epoch": 4.88, "learning_rate": 2.9341204441673266e-05, "loss": 1.2773, "step": 580 }, { "epoch": 4.96, "learning_rate": 2.8623296482681166e-05, "loss": 1.3122, "step": 590 }, { "epoch": 5.05, "learning_rate": 2.7902322853130757e-05, "loss": 1.2999, "step": 600 }, { "epoch": 5.13, "learning_rate": 2.717889356869146e-05, "loss": 1.13, "step": 610 }, { "epoch": 5.22, "learning_rate": 2.6453620722761896e-05, "loss": 1.1283, "step": 620 }, { "epoch": 5.3, "learning_rate": 2.5727117968577784e-05, "loss": 1.0922, "step": 630 }, { "epoch": 5.38, "learning_rate": 2.5e-05, "loss": 1.0549, "step": 640 }, { "epoch": 5.47, "learning_rate": 2.4272882031422215e-05, "loss": 1.0966, "step": 650 }, { "epoch": 5.55, "learning_rate": 2.3546379277238107e-05, "loss": 1.2114, "step": 660 }, { "epoch": 5.64, "learning_rate": 2.2821106431308544e-05, "loss": 0.9685, "step": 670 }, { "epoch": 5.72, "learning_rate": 2.2097677146869242e-05, "loss": 0.9688, "step": 680 }, { "epoch": 5.8, "learning_rate": 2.1376703517318837e-05, "loss": 1.0943, "step": 690 }, { "epoch": 5.89, "learning_rate": 2.0658795558326743e-05, "loss": 0.9363, "step": 700 }, { "epoch": 5.97, "learning_rate": 1.9944560691699057e-05, "loss": 1.1054, "step": 710 }, { "epoch": 6.06, "learning_rate": 1.9234603231438995e-05, "loss": 1.0781, "step": 720 }, { "epoch": 6.14, "learning_rate": 1.852952387243698e-05, "loss": 0.8869, "step": 730 }, { "epoch": 6.23, "learning_rate": 1.7829919182222752e-05, "loss": 0.8957, "step": 740 }, { "epoch": 6.31, "learning_rate": 1.7136381096209664e-05, "loss": 1.0628, "step": 750 }, { "epoch": 6.39, "learning_rate": 1.6449496416858284e-05, "loss": 1.0005, "step": 760 }, { "epoch": 6.48, "learning_rate": 1.5769846317182893e-05, "loss": 0.9747, "step": 770 }, { "epoch": 6.56, "learning_rate": 1.509800584902108e-05, "loss": 1.0524, "step": 780 }, { "epoch": 6.65, "learning_rate": 1.443454345648252e-05, "loss": 1.0064, "step": 790 }, { "epoch": 6.73, "learning_rate": 1.3780020494988446e-05, "loss": 0.979, "step": 800 }, { "epoch": 6.81, "learning_rate": 1.313499075630899e-05, "loss": 0.9495, "step": 810 }, { "epoch": 6.9, "learning_rate": 1.2500000000000006e-05, "loss": 0.8098, "step": 820 }, { "epoch": 6.98, "learning_rate": 1.1875585491636e-05, "loss": 0.9074, "step": 830 }, { "epoch": 7.07, "learning_rate": 1.126227554822985e-05, "loss": 0.8522, "step": 840 }, { "epoch": 7.15, "learning_rate": 1.0660589091223855e-05, "loss": 0.8949, "step": 850 }, { "epoch": 7.23, "learning_rate": 1.0071035207430352e-05, "loss": 0.9344, "step": 860 }, { "epoch": 7.32, "learning_rate": 9.494112718293501e-06, "loss": 0.7258, "step": 870 }, { "epoch": 7.4, "learning_rate": 8.930309757836517e-06, "loss": 0.8315, "step": 880 }, { "epoch": 7.49, "learning_rate": 8.380103359651553e-06, "loss": 0.9103, "step": 890 }, { "epoch": 7.57, "learning_rate": 7.843959053281663e-06, "loss": 0.8161, "step": 900 }, { "epoch": 7.66, "learning_rate": 7.3223304703363135e-06, "loss": 0.8657, "step": 910 }, { "epoch": 7.74, "learning_rate": 6.815658960673782e-06, "loss": 0.8315, "step": 920 }, { "epoch": 7.82, "learning_rate": 6.324373218975105e-06, "loss": 0.8273, "step": 930 }, { "epoch": 7.91, "learning_rate": 5.848888922025553e-06, "loss": 0.8765, "step": 940 }, { "epoch": 7.99, "learning_rate": 5.389608377010608e-06, "loss": 0.8816, "step": 950 }, { "epoch": 8.08, "learning_rate": 4.946920181123904e-06, "loss": 0.8582, "step": 960 }, { "epoch": 8.16, "learning_rate": 4.521198892775203e-06, "loss": 0.7583, "step": 970 }, { "epoch": 8.24, "learning_rate": 4.112804714676594e-06, "loss": 0.862, "step": 980 }, { "epoch": 8.33, "learning_rate": 3.7220831890750067e-06, "loss": 0.8618, "step": 990 }, { "epoch": 8.41, "learning_rate": 3.3493649053890326e-06, "loss": 0.6312, "step": 1000 }, { "epoch": 8.5, "learning_rate": 2.9949652204972254e-06, "loss": 0.8316, "step": 1010 }, { "epoch": 8.58, "learning_rate": 2.659183991914696e-06, "loss": 0.8118, "step": 1020 }, { "epoch": 8.66, "learning_rate": 2.3423053240837515e-06, "loss": 0.8255, "step": 1030 }, { "epoch": 8.75, "learning_rate": 2.044597327993153e-06, "loss": 0.7131, "step": 1040 }, { "epoch": 8.83, "learning_rate": 1.7663118943294366e-06, "loss": 0.8605, "step": 1050 }, { "epoch": 8.92, "learning_rate": 1.5076844803522922e-06, "loss": 0.7849, "step": 1060 }, { "epoch": 9.0, "learning_rate": 1.2689339106741527e-06, "loss": 0.8457, "step": 1070 }, { "epoch": 9.09, "learning_rate": 1.0502621921127776e-06, "loss": 0.7074, "step": 1080 }, { "epoch": 9.17, "learning_rate": 8.51854342773295e-07, "loss": 0.7921, "step": 1090 }, { "epoch": 9.25, "learning_rate": 6.738782355044049e-07, "loss": 0.7745, "step": 1100 }, { "epoch": 9.34, "learning_rate": 5.164844558612131e-07, "loss": 0.8488, "step": 1110 }, { "epoch": 9.42, "learning_rate": 3.7980617469479953e-07, "loss": 0.7559, "step": 1120 }, { "epoch": 9.51, "learning_rate": 2.6395903547638825e-07, "loss": 0.8448, "step": 1130 }, { "epoch": 9.59, "learning_rate": 1.6904105645142444e-07, "loss": 0.7632, "step": 1140 }, { "epoch": 9.67, "learning_rate": 9.513254770636137e-08, "loss": 0.6902, "step": 1150 }, { "epoch": 9.76, "learning_rate": 4.229604321829561e-08, "loss": 0.8562, "step": 1160 }, { "epoch": 9.84, "learning_rate": 1.0576247944985018e-08, "loss": 0.8078, "step": 1170 }, { "epoch": 9.93, "learning_rate": 0.0, "loss": 0.7809, "step": 1180 }, { "epoch": 9.93, "step": 1180, "total_flos": 1.0185645586201313e+18, "train_loss": 1.5241096448090117, "train_runtime": 23566.1012, "train_samples_per_second": 1.614, "train_steps_per_second": 0.05 } ], "logging_steps": 10, "max_steps": 1180, "num_train_epochs": 10, "save_steps": 100, "total_flos": 1.0185645586201313e+18, "trial_name": null, "trial_params": null }