Instructions to use townboy/kpfbert-kdpii with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use townboy/kpfbert-kdpii with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("token-classification", model="townboy/kpfbert-kdpii")# Load model directly from transformers import AutoTokenizer, AutoModelForTokenClassification tokenizer = AutoTokenizer.from_pretrained("townboy/kpfbert-kdpii") model = AutoModelForTokenClassification.from_pretrained("townboy/kpfbert-kdpii") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": 21448, | |
| "best_metric": 0.9201277955271566, | |
| "best_model_checkpoint": "outputs\\kpf-kdpii-ner\\checkpoint-21448", | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 21448, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009324878776575904, | |
| "grad_norm": 4.206214904785156, | |
| "learning_rate": 4.568764568764569e-07, | |
| "loss": 4.162920837402344, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01864975755315181, | |
| "grad_norm": 4.167344570159912, | |
| "learning_rate": 9.230769230769232e-07, | |
| "loss": 4.108631591796875, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.027974636329727715, | |
| "grad_norm": 6.085664749145508, | |
| "learning_rate": 1.3892773892773895e-06, | |
| "loss": 3.964860534667969, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03729951510630362, | |
| "grad_norm": 9.482864379882812, | |
| "learning_rate": 1.8554778554778559e-06, | |
| "loss": 3.5463021850585936, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04662439388287952, | |
| "grad_norm": 10.59736156463623, | |
| "learning_rate": 2.321678321678322e-06, | |
| "loss": 2.1147555541992187, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05594927265945543, | |
| "grad_norm": 6.983637809753418, | |
| "learning_rate": 2.7878787878787885e-06, | |
| "loss": 0.868929214477539, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06527415143603134, | |
| "grad_norm": 5.8941168785095215, | |
| "learning_rate": 3.254079254079254e-06, | |
| "loss": 0.6754582214355469, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.07459903021260723, | |
| "grad_norm": 6.817300319671631, | |
| "learning_rate": 3.7202797202797207e-06, | |
| "loss": 0.6864476013183594, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08392390898918314, | |
| "grad_norm": 3.412003517150879, | |
| "learning_rate": 4.186480186480187e-06, | |
| "loss": 0.6888908386230469, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.09324878776575904, | |
| "grad_norm": 4.4841694831848145, | |
| "learning_rate": 4.652680652680653e-06, | |
| "loss": 0.6424143981933593, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.10257366654233495, | |
| "grad_norm": 0.5611337423324585, | |
| "learning_rate": 5.118881118881119e-06, | |
| "loss": 0.6713462066650391, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.11189854531891086, | |
| "grad_norm": 2.851097583770752, | |
| "learning_rate": 5.585081585081585e-06, | |
| "loss": 0.557415771484375, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.12122342409548675, | |
| "grad_norm": 1.917966604232788, | |
| "learning_rate": 6.051282051282051e-06, | |
| "loss": 0.5822576141357422, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.13054830287206268, | |
| "grad_norm": 0.473528116941452, | |
| "learning_rate": 6.517482517482518e-06, | |
| "loss": 0.5558326721191407, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.13987318164863857, | |
| "grad_norm": 5.384398460388184, | |
| "learning_rate": 6.983682983682984e-06, | |
| "loss": 0.5160323715209961, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.14919806042521447, | |
| "grad_norm": 3.535067558288574, | |
| "learning_rate": 7.44988344988345e-06, | |
| "loss": 0.5048127365112305, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.15852293920179036, | |
| "grad_norm": 2.199418306350708, | |
| "learning_rate": 7.916083916083917e-06, | |
| "loss": 0.5268861770629882, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.1678478179783663, | |
| "grad_norm": 5.3116774559021, | |
| "learning_rate": 8.382284382284382e-06, | |
| "loss": 0.504549446105957, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.17717269675494218, | |
| "grad_norm": 5.913183689117432, | |
| "learning_rate": 8.84848484848485e-06, | |
| "loss": 0.4870978546142578, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.18649757553151808, | |
| "grad_norm": 1.0376594066619873, | |
| "learning_rate": 9.314685314685316e-06, | |
| "loss": 0.4489225769042969, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.195822454308094, | |
| "grad_norm": 6.973942279815674, | |
| "learning_rate": 9.780885780885782e-06, | |
| "loss": 0.3977284240722656, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.2051473330846699, | |
| "grad_norm": 3.3436081409454346, | |
| "learning_rate": 1.0247086247086249e-05, | |
| "loss": 0.38577865600585937, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2144722118612458, | |
| "grad_norm": 11.863536834716797, | |
| "learning_rate": 1.0713286713286714e-05, | |
| "loss": 0.3806105422973633, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.22379709063782172, | |
| "grad_norm": 5.786470413208008, | |
| "learning_rate": 1.117948717948718e-05, | |
| "loss": 0.37290851593017577, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.2331219694143976, | |
| "grad_norm": 3.0955147743225098, | |
| "learning_rate": 1.1645687645687646e-05, | |
| "loss": 0.33695747375488283, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.2424468481909735, | |
| "grad_norm": 10.411843299865723, | |
| "learning_rate": 1.2111888111888113e-05, | |
| "loss": 0.3169963836669922, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.2517717269675494, | |
| "grad_norm": 1.3671921491622925, | |
| "learning_rate": 1.2578088578088578e-05, | |
| "loss": 0.274707088470459, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.26109660574412535, | |
| "grad_norm": 0.8266241550445557, | |
| "learning_rate": 1.3044289044289045e-05, | |
| "loss": 0.24858610153198243, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.27042148452070125, | |
| "grad_norm": 3.4048688411712646, | |
| "learning_rate": 1.351048951048951e-05, | |
| "loss": 0.2767606163024902, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.27974636329727715, | |
| "grad_norm": 5.141544342041016, | |
| "learning_rate": 1.3976689976689979e-05, | |
| "loss": 0.23059135437011719, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.28907124207385304, | |
| "grad_norm": 2.9960217475891113, | |
| "learning_rate": 1.4442890442890444e-05, | |
| "loss": 0.2576522636413574, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.29839612085042894, | |
| "grad_norm": 7.788145542144775, | |
| "learning_rate": 1.4909090909090911e-05, | |
| "loss": 0.21580177307128906, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.30772099962700483, | |
| "grad_norm": 1.1988259553909302, | |
| "learning_rate": 1.5375291375291378e-05, | |
| "loss": 0.20308712005615234, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.3170458784035807, | |
| "grad_norm": 1.3631523847579956, | |
| "learning_rate": 1.5841491841491843e-05, | |
| "loss": 0.17964347839355468, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.3263707571801567, | |
| "grad_norm": 0.7174279689788818, | |
| "learning_rate": 1.630769230769231e-05, | |
| "loss": 0.18456392288208007, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.3356956359567326, | |
| "grad_norm": 2.560981273651123, | |
| "learning_rate": 1.6773892773892774e-05, | |
| "loss": 0.16574619293212892, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.34502051473330847, | |
| "grad_norm": 0.93900465965271, | |
| "learning_rate": 1.724009324009324e-05, | |
| "loss": 0.16731924057006836, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.35434539350988437, | |
| "grad_norm": 3.983893394470215, | |
| "learning_rate": 1.7706293706293708e-05, | |
| "loss": 0.14913288116455078, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.36367027228646026, | |
| "grad_norm": 3.581357479095459, | |
| "learning_rate": 1.8172494172494176e-05, | |
| "loss": 0.1477263832092285, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.37299515106303616, | |
| "grad_norm": 1.5302927494049072, | |
| "learning_rate": 1.8638694638694642e-05, | |
| "loss": 0.139080171585083, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3823200298396121, | |
| "grad_norm": 4.7187910079956055, | |
| "learning_rate": 1.9104895104895107e-05, | |
| "loss": 0.1504351806640625, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.391644908616188, | |
| "grad_norm": 0.5396754145622253, | |
| "learning_rate": 1.9571095571095572e-05, | |
| "loss": 0.12960749626159668, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.4009697873927639, | |
| "grad_norm": 25.248533248901367, | |
| "learning_rate": 1.9995855566492257e-05, | |
| "loss": 0.14916876792907716, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.4102946661693398, | |
| "grad_norm": 6.426814079284668, | |
| "learning_rate": 1.9944050147645447e-05, | |
| "loss": 0.1148387622833252, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.4196195449459157, | |
| "grad_norm": 2.097109317779541, | |
| "learning_rate": 1.9892244728798633e-05, | |
| "loss": 0.11595455169677735, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.4289444237224916, | |
| "grad_norm": 1.893579125404358, | |
| "learning_rate": 1.9840439309951823e-05, | |
| "loss": 0.10183592796325684, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.43826930249906754, | |
| "grad_norm": 5.724792003631592, | |
| "learning_rate": 1.9788633891105013e-05, | |
| "loss": 0.09552323341369628, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.44759418127564343, | |
| "grad_norm": 1.2438207864761353, | |
| "learning_rate": 1.97368284722582e-05, | |
| "loss": 0.0971086597442627, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.45691906005221933, | |
| "grad_norm": 0.3428623676300049, | |
| "learning_rate": 1.968502305341139e-05, | |
| "loss": 0.09295537948608398, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.4662439388287952, | |
| "grad_norm": 1.6153521537780762, | |
| "learning_rate": 1.9633217634564575e-05, | |
| "loss": 0.12522640228271484, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.4755688176053711, | |
| "grad_norm": 2.685026168823242, | |
| "learning_rate": 1.9581412215717765e-05, | |
| "loss": 0.08630707740783691, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.484893696381947, | |
| "grad_norm": 0.31434252858161926, | |
| "learning_rate": 1.9529606796870955e-05, | |
| "loss": 0.10333613395690917, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.49421857515852297, | |
| "grad_norm": 2.9683053493499756, | |
| "learning_rate": 1.9477801378024144e-05, | |
| "loss": 0.07084932804107666, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.5035434539350988, | |
| "grad_norm": 3.0713419914245605, | |
| "learning_rate": 1.942599595917733e-05, | |
| "loss": 0.08680294990539551, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5128683327116748, | |
| "grad_norm": 3.3729348182678223, | |
| "learning_rate": 1.937419054033052e-05, | |
| "loss": 0.07697622299194336, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.5221932114882507, | |
| "grad_norm": 24.873640060424805, | |
| "learning_rate": 1.932238512148371e-05, | |
| "loss": 0.10056709289550782, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.5315180902648265, | |
| "grad_norm": 0.8206455707550049, | |
| "learning_rate": 1.9270579702636897e-05, | |
| "loss": 0.09658415794372559, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.5408429690414025, | |
| "grad_norm": 11.821130752563477, | |
| "learning_rate": 1.9218774283790087e-05, | |
| "loss": 0.06848039150238037, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.5501678478179783, | |
| "grad_norm": 0.6246572136878967, | |
| "learning_rate": 1.9166968864943273e-05, | |
| "loss": 0.08011377334594727, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.5594927265945543, | |
| "grad_norm": 3.210092306137085, | |
| "learning_rate": 1.9115163446096463e-05, | |
| "loss": 0.10572279930114746, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5688176053711301, | |
| "grad_norm": 3.658480644226074, | |
| "learning_rate": 1.9063358027249653e-05, | |
| "loss": 0.0694641637802124, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.5781424841477061, | |
| "grad_norm": 0.8585368394851685, | |
| "learning_rate": 1.9011552608402842e-05, | |
| "loss": 0.07078958988189697, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.587467362924282, | |
| "grad_norm": 0.5600335001945496, | |
| "learning_rate": 1.895974718955603e-05, | |
| "loss": 0.07252558708190918, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.5967922417008579, | |
| "grad_norm": 4.424919605255127, | |
| "learning_rate": 1.890794177070922e-05, | |
| "loss": 0.08730278968811035, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.6061171204774338, | |
| "grad_norm": 4.6426849365234375, | |
| "learning_rate": 1.8856136351862405e-05, | |
| "loss": 0.050492286682128906, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.6154419992540097, | |
| "grad_norm": 4.4583210945129395, | |
| "learning_rate": 1.8804330933015595e-05, | |
| "loss": 0.054503369331359866, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6247668780305856, | |
| "grad_norm": 0.5928723812103271, | |
| "learning_rate": 1.8752525514168784e-05, | |
| "loss": 0.0773204231262207, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.6340917568071615, | |
| "grad_norm": 0.8700105547904968, | |
| "learning_rate": 1.870072009532197e-05, | |
| "loss": 0.08818217277526856, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.6434166355837374, | |
| "grad_norm": 6.234158515930176, | |
| "learning_rate": 1.864891467647516e-05, | |
| "loss": 0.05500625610351562, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.6527415143603134, | |
| "grad_norm": 0.5930687785148621, | |
| "learning_rate": 1.859710925762835e-05, | |
| "loss": 0.051360769271850584, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.6620663931368892, | |
| "grad_norm": 0.048168476670980453, | |
| "learning_rate": 1.854530383878154e-05, | |
| "loss": 0.07871677875518798, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.6713912719134651, | |
| "grad_norm": 0.26890629529953003, | |
| "learning_rate": 1.8493498419934727e-05, | |
| "loss": 0.03978243350982666, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.680716150690041, | |
| "grad_norm": 1.0152816772460938, | |
| "learning_rate": 1.8441693001087916e-05, | |
| "loss": 0.06742914199829102, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.6900410294666169, | |
| "grad_norm": 5.40765905380249, | |
| "learning_rate": 1.8389887582241103e-05, | |
| "loss": 0.04563611030578613, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.6993659082431929, | |
| "grad_norm": 9.407204627990723, | |
| "learning_rate": 1.8338082163394293e-05, | |
| "loss": 0.06598632335662842, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.7086907870197687, | |
| "grad_norm": 1.0526869297027588, | |
| "learning_rate": 1.8286276744547482e-05, | |
| "loss": 0.06473824024200439, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.7180156657963447, | |
| "grad_norm": 5.696482181549072, | |
| "learning_rate": 1.823447132570067e-05, | |
| "loss": 0.0610739803314209, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.7273405445729205, | |
| "grad_norm": 0.10160894691944122, | |
| "learning_rate": 1.818266590685386e-05, | |
| "loss": 0.05340108394622803, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.7366654233494965, | |
| "grad_norm": 3.2599477767944336, | |
| "learning_rate": 1.813086048800705e-05, | |
| "loss": 0.06807507038116455, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.7459903021260723, | |
| "grad_norm": 1.383055329322815, | |
| "learning_rate": 1.8079055069160235e-05, | |
| "loss": 0.058188986778259275, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.7553151809026483, | |
| "grad_norm": 6.310545444488525, | |
| "learning_rate": 1.8027249650313424e-05, | |
| "loss": 0.06925914287567139, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.7646400596792242, | |
| "grad_norm": 2.753561496734619, | |
| "learning_rate": 1.797544423146661e-05, | |
| "loss": 0.06061097145080566, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.7739649384558001, | |
| "grad_norm": 0.06244755908846855, | |
| "learning_rate": 1.79236388126198e-05, | |
| "loss": 0.05539895057678223, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.783289817232376, | |
| "grad_norm": 1.5955125093460083, | |
| "learning_rate": 1.787183339377299e-05, | |
| "loss": 0.04949520111083985, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.7926146960089518, | |
| "grad_norm": 0.15867096185684204, | |
| "learning_rate": 1.782002797492618e-05, | |
| "loss": 0.04355106830596924, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.8019395747855278, | |
| "grad_norm": 0.03898247703909874, | |
| "learning_rate": 1.7768222556079367e-05, | |
| "loss": 0.06238871097564697, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.8112644535621036, | |
| "grad_norm": 0.10622036457061768, | |
| "learning_rate": 1.7716417137232556e-05, | |
| "loss": 0.06471785068511964, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.8205893323386796, | |
| "grad_norm": 2.5175602436065674, | |
| "learning_rate": 1.7664611718385746e-05, | |
| "loss": 0.04465628147125244, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.8299142111152555, | |
| "grad_norm": 0.6827256679534912, | |
| "learning_rate": 1.7612806299538933e-05, | |
| "loss": 0.05508995056152344, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.8392390898918314, | |
| "grad_norm": 4.929401397705078, | |
| "learning_rate": 1.7561000880692122e-05, | |
| "loss": 0.028713507652282713, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.8485639686684073, | |
| "grad_norm": 3.8355817794799805, | |
| "learning_rate": 1.750919546184531e-05, | |
| "loss": 0.054467902183532715, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.8578888474449832, | |
| "grad_norm": 0.07267450541257858, | |
| "learning_rate": 1.74573900429985e-05, | |
| "loss": 0.05773499965667725, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.8672137262215591, | |
| "grad_norm": 2.4586944580078125, | |
| "learning_rate": 1.740558462415169e-05, | |
| "loss": 0.06556248664855957, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.8765386049981351, | |
| "grad_norm": 4.859276294708252, | |
| "learning_rate": 1.7353779205304878e-05, | |
| "loss": 0.06336853981018066, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.8858634837747109, | |
| "grad_norm": 0.44299831986427307, | |
| "learning_rate": 1.7301973786458065e-05, | |
| "loss": 0.05126949310302734, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.8951883625512869, | |
| "grad_norm": 5.093299865722656, | |
| "learning_rate": 1.7250168367611254e-05, | |
| "loss": 0.05324135303497315, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.9045132413278627, | |
| "grad_norm": 1.6905597448349, | |
| "learning_rate": 1.719836294876444e-05, | |
| "loss": 0.048749656677246095, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.9138381201044387, | |
| "grad_norm": 0.30517128109931946, | |
| "learning_rate": 1.714655752991763e-05, | |
| "loss": 0.05433460712432861, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.9231629988810145, | |
| "grad_norm": 0.4588942527770996, | |
| "learning_rate": 1.709475211107082e-05, | |
| "loss": 0.04207270622253418, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.9324878776575904, | |
| "grad_norm": 0.036567509174346924, | |
| "learning_rate": 1.7042946692224007e-05, | |
| "loss": 0.05857636451721191, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.9418127564341664, | |
| "grad_norm": 3.270030975341797, | |
| "learning_rate": 1.6991141273377196e-05, | |
| "loss": 0.05915598869323731, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.9511376352107422, | |
| "grad_norm": 6.163786888122559, | |
| "learning_rate": 1.6939335854530386e-05, | |
| "loss": 0.051976222991943356, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.9604625139873182, | |
| "grad_norm": 0.16877496242523193, | |
| "learning_rate": 1.6887530435683576e-05, | |
| "loss": 0.051587677001953124, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.969787392763894, | |
| "grad_norm": 0.4458121657371521, | |
| "learning_rate": 1.6835725016836762e-05, | |
| "loss": 0.037312333583831785, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.97911227154047, | |
| "grad_norm": 0.01349574513733387, | |
| "learning_rate": 1.6783919597989952e-05, | |
| "loss": 0.04749881267547607, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.9884371503170459, | |
| "grad_norm": 0.6714735627174377, | |
| "learning_rate": 1.673211417914314e-05, | |
| "loss": 0.050176200866699217, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.9977620290936218, | |
| "grad_norm": 11.990230560302734, | |
| "learning_rate": 1.668030876029633e-05, | |
| "loss": 0.07070876598358154, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9874433707806322, | |
| "eval_f1": 0.8682108626198082, | |
| "eval_loss": 0.050337210297584534, | |
| "eval_precision": 0.8606492478226445, | |
| "eval_recall": 0.8759065269943593, | |
| "eval_runtime": 27.5818, | |
| "eval_samples_per_second": 198.065, | |
| "eval_steps_per_second": 24.763, | |
| "step": 5362 | |
| }, | |
| { | |
| "epoch": 1.0070869078701976, | |
| "grad_norm": 0.7046685814857483, | |
| "learning_rate": 1.6628503341449518e-05, | |
| "loss": 0.037335155010223386, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.0164117866467737, | |
| "grad_norm": 2.07792592048645, | |
| "learning_rate": 1.6576697922602705e-05, | |
| "loss": 0.03664821147918701, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.0257366654233495, | |
| "grad_norm": 1.8149992227554321, | |
| "learning_rate": 1.6524892503755894e-05, | |
| "loss": 0.03763864040374756, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.0350615441999254, | |
| "grad_norm": 0.8814394474029541, | |
| "learning_rate": 1.6473087084909084e-05, | |
| "loss": 0.0462725305557251, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.0443864229765012, | |
| "grad_norm": 8.331986427307129, | |
| "learning_rate": 1.642128166606227e-05, | |
| "loss": 0.025801122188568115, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.0537113017530773, | |
| "grad_norm": 0.5653894543647766, | |
| "learning_rate": 1.636947624721546e-05, | |
| "loss": 0.0302036452293396, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.063036180529653, | |
| "grad_norm": 0.1264486312866211, | |
| "learning_rate": 1.6317670828368647e-05, | |
| "loss": 0.03399224281311035, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.072361059306229, | |
| "grad_norm": 1.2637239694595337, | |
| "learning_rate": 1.6265865409521836e-05, | |
| "loss": 0.04418774604797363, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.081685938082805, | |
| "grad_norm": 5.040623188018799, | |
| "learning_rate": 1.6214059990675026e-05, | |
| "loss": 0.02509807586669922, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.0910108168593808, | |
| "grad_norm": 0.03714745491743088, | |
| "learning_rate": 1.6162254571828216e-05, | |
| "loss": 0.026492388248443605, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.1003356956359567, | |
| "grad_norm": 0.7756729125976562, | |
| "learning_rate": 1.6110449152981402e-05, | |
| "loss": 0.02965877056121826, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.1096605744125327, | |
| "grad_norm": 0.217277392745018, | |
| "learning_rate": 1.6058643734134592e-05, | |
| "loss": 0.022216553688049315, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.1189854531891086, | |
| "grad_norm": 4.127126216888428, | |
| "learning_rate": 1.6006838315287782e-05, | |
| "loss": 0.03008180618286133, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.1283103319656844, | |
| "grad_norm": 0.01144993957132101, | |
| "learning_rate": 1.595503289644097e-05, | |
| "loss": 0.04714715957641601, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.1376352107422603, | |
| "grad_norm": 0.037526026368141174, | |
| "learning_rate": 1.5903227477594158e-05, | |
| "loss": 0.04247360706329346, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.1469600895188363, | |
| "grad_norm": 0.9934174418449402, | |
| "learning_rate": 1.5851422058747345e-05, | |
| "loss": 0.02983764886856079, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.1562849682954122, | |
| "grad_norm": 0.4428967237472534, | |
| "learning_rate": 1.5799616639900534e-05, | |
| "loss": 0.027351632118225097, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.165609847071988, | |
| "grad_norm": 0.05002899840474129, | |
| "learning_rate": 1.5747811221053724e-05, | |
| "loss": 0.02518010139465332, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.174934725848564, | |
| "grad_norm": 0.18001802265644073, | |
| "learning_rate": 1.5696005802206914e-05, | |
| "loss": 0.040030746459960936, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.18425960462514, | |
| "grad_norm": 0.21795284748077393, | |
| "learning_rate": 1.56442003833601e-05, | |
| "loss": 0.03228116512298584, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.1935844834017157, | |
| "grad_norm": 0.33233147859573364, | |
| "learning_rate": 1.559239496451329e-05, | |
| "loss": 0.04077389717102051, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.2029093621782916, | |
| "grad_norm": 0.07459854334592819, | |
| "learning_rate": 1.5540589545666476e-05, | |
| "loss": 0.035812277793884274, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.2122342409548676, | |
| "grad_norm": 0.03117297776043415, | |
| "learning_rate": 1.5488784126819666e-05, | |
| "loss": 0.043056426048278806, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.2215591197314435, | |
| "grad_norm": 2.1351895332336426, | |
| "learning_rate": 1.5436978707972856e-05, | |
| "loss": 0.026498048305511473, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.2308839985080193, | |
| "grad_norm": 0.1372031569480896, | |
| "learning_rate": 1.5385173289126042e-05, | |
| "loss": 0.03637035131454468, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.2402088772845954, | |
| "grad_norm": 0.06523732095956802, | |
| "learning_rate": 1.5333367870279232e-05, | |
| "loss": 0.06686021327972412, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.2495337560611712, | |
| "grad_norm": 0.02072199061512947, | |
| "learning_rate": 1.5281562451432422e-05, | |
| "loss": 0.03420682668685913, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.258858634837747, | |
| "grad_norm": 9.351777076721191, | |
| "learning_rate": 1.522975703258561e-05, | |
| "loss": 0.026438066959381102, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.2681835136143231, | |
| "grad_norm": 0.40086886286735535, | |
| "learning_rate": 1.5177951613738796e-05, | |
| "loss": 0.046449775695800784, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.277508392390899, | |
| "grad_norm": 0.5892062783241272, | |
| "learning_rate": 1.5126146194891986e-05, | |
| "loss": 0.0309269380569458, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.2868332711674748, | |
| "grad_norm": 0.002104206709191203, | |
| "learning_rate": 1.5074340776045176e-05, | |
| "loss": 0.023648200035095216, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.2961581499440507, | |
| "grad_norm": 0.0258804801851511, | |
| "learning_rate": 1.5022535357198364e-05, | |
| "loss": 0.04035449504852295, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.3054830287206267, | |
| "grad_norm": 4.397562026977539, | |
| "learning_rate": 1.4970729938351554e-05, | |
| "loss": 0.05229721546173096, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.3148079074972026, | |
| "grad_norm": 0.007150724530220032, | |
| "learning_rate": 1.491892451950474e-05, | |
| "loss": 0.03332861661911011, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.3241327862737784, | |
| "grad_norm": 3.109645128250122, | |
| "learning_rate": 1.486711910065793e-05, | |
| "loss": 0.047160525321960446, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.3334576650503545, | |
| "grad_norm": 0.41086408495903015, | |
| "learning_rate": 1.4815313681811118e-05, | |
| "loss": 0.04890709400177002, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.3427825438269303, | |
| "grad_norm": 0.05025002732872963, | |
| "learning_rate": 1.4763508262964308e-05, | |
| "loss": 0.04013650417327881, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.3521074226035061, | |
| "grad_norm": 0.03028084896504879, | |
| "learning_rate": 1.4711702844117498e-05, | |
| "loss": 0.0283713960647583, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.361432301380082, | |
| "grad_norm": 0.031166842207312584, | |
| "learning_rate": 1.4659897425270684e-05, | |
| "loss": 0.03349567174911499, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.370757180156658, | |
| "grad_norm": 4.032196521759033, | |
| "learning_rate": 1.4608092006423872e-05, | |
| "loss": 0.029915103912353514, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.3800820589332339, | |
| "grad_norm": 3.165501594543457, | |
| "learning_rate": 1.4556286587577062e-05, | |
| "loss": 0.040238561630249026, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.3894069377098097, | |
| "grad_norm": 0.05803289636969566, | |
| "learning_rate": 1.4504481168730252e-05, | |
| "loss": 0.03673480272293091, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.3987318164863858, | |
| "grad_norm": 0.0027874386869370937, | |
| "learning_rate": 1.4452675749883438e-05, | |
| "loss": 0.03083367109298706, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.4080566952629616, | |
| "grad_norm": 0.39723604917526245, | |
| "learning_rate": 1.4400870331036628e-05, | |
| "loss": 0.03706796646118164, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.4173815740395375, | |
| "grad_norm": 0.006277570500969887, | |
| "learning_rate": 1.4349064912189816e-05, | |
| "loss": 0.030687217712402345, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.4267064528161133, | |
| "grad_norm": 2.196660041809082, | |
| "learning_rate": 1.4297259493343006e-05, | |
| "loss": 0.03273656129837036, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.4360313315926894, | |
| "grad_norm": 2.9575355052948, | |
| "learning_rate": 1.4245454074496194e-05, | |
| "loss": 0.025758986473083497, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.4453562103692652, | |
| "grad_norm": 0.08796069771051407, | |
| "learning_rate": 1.4193648655649382e-05, | |
| "loss": 0.03270584583282471, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.454681089145841, | |
| "grad_norm": 0.9201443791389465, | |
| "learning_rate": 1.414184323680257e-05, | |
| "loss": 0.023307127952575682, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.464005967922417, | |
| "grad_norm": 1.7311280965805054, | |
| "learning_rate": 1.409003781795576e-05, | |
| "loss": 0.03505758047103882, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.473330846698993, | |
| "grad_norm": 7.217854022979736, | |
| "learning_rate": 1.4038232399108948e-05, | |
| "loss": 0.032117910385131836, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.4826557254755688, | |
| "grad_norm": 0.16375161707401276, | |
| "learning_rate": 1.3986426980262136e-05, | |
| "loss": 0.024907276630401612, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.4919806042521446, | |
| "grad_norm": 2.5342984199523926, | |
| "learning_rate": 1.3934621561415324e-05, | |
| "loss": 0.02424616813659668, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.5013054830287205, | |
| "grad_norm": 0.5667886137962341, | |
| "learning_rate": 1.3882816142568514e-05, | |
| "loss": 0.035223734378814694, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.5106303618052965, | |
| "grad_norm": 4.529999732971191, | |
| "learning_rate": 1.3831010723721704e-05, | |
| "loss": 0.043730239868164066, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.5199552405818726, | |
| "grad_norm": 0.31222647428512573, | |
| "learning_rate": 1.3779205304874892e-05, | |
| "loss": 0.02817868709564209, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.5292801193584484, | |
| "grad_norm": 0.054891835898160934, | |
| "learning_rate": 1.3727399886028078e-05, | |
| "loss": 0.022635526657104492, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.5386049981350243, | |
| "grad_norm": 3.897071361541748, | |
| "learning_rate": 1.3675594467181268e-05, | |
| "loss": 0.0365865421295166, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.5479298769116001, | |
| "grad_norm": 7.58866024017334, | |
| "learning_rate": 1.3623789048334458e-05, | |
| "loss": 0.03705208778381348, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.557254755688176, | |
| "grad_norm": 0.2579911947250366, | |
| "learning_rate": 1.3571983629487646e-05, | |
| "loss": 0.036470816135406495, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.566579634464752, | |
| "grad_norm": 0.04304761812090874, | |
| "learning_rate": 1.3520178210640836e-05, | |
| "loss": 0.024144577980041503, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.5759045132413279, | |
| "grad_norm": 0.011871698312461376, | |
| "learning_rate": 1.3468372791794022e-05, | |
| "loss": 0.02437096118927002, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.585229392017904, | |
| "grad_norm": 1.8747565746307373, | |
| "learning_rate": 1.3416567372947212e-05, | |
| "loss": 0.021970641613006592, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.5945542707944798, | |
| "grad_norm": 0.0068074301816523075, | |
| "learning_rate": 1.33647619541004e-05, | |
| "loss": 0.03055704355239868, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.6038791495710556, | |
| "grad_norm": 0.7720779776573181, | |
| "learning_rate": 1.331295653525359e-05, | |
| "loss": 0.05535665988922119, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.6132040283476314, | |
| "grad_norm": 0.052967652678489685, | |
| "learning_rate": 1.3261151116406776e-05, | |
| "loss": 0.037333052158355716, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 1.6225289071242073, | |
| "grad_norm": 0.4131523370742798, | |
| "learning_rate": 1.3209345697559966e-05, | |
| "loss": 0.022513895034790038, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.6318537859007833, | |
| "grad_norm": 0.0903526097536087, | |
| "learning_rate": 1.3157540278713154e-05, | |
| "loss": 0.018794809579849244, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 1.6411786646773592, | |
| "grad_norm": 0.03226502984762192, | |
| "learning_rate": 1.3105734859866344e-05, | |
| "loss": 0.021845638751983643, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.6505035434539352, | |
| "grad_norm": 1.2775359153747559, | |
| "learning_rate": 1.3053929441019533e-05, | |
| "loss": 0.03150895118713379, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.659828422230511, | |
| "grad_norm": 0.0202046986669302, | |
| "learning_rate": 1.300212402217272e-05, | |
| "loss": 0.02440279006958008, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.669153301007087, | |
| "grad_norm": 5.867640495300293, | |
| "learning_rate": 1.2950318603325908e-05, | |
| "loss": 0.033769989013671876, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 1.6784781797836628, | |
| "grad_norm": 0.006102518644183874, | |
| "learning_rate": 1.2898513184479098e-05, | |
| "loss": 0.020930655002593994, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.6878030585602386, | |
| "grad_norm": 0.06524361670017242, | |
| "learning_rate": 1.2846707765632288e-05, | |
| "loss": 0.0303147554397583, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.6971279373368147, | |
| "grad_norm": 1.1646746397018433, | |
| "learning_rate": 1.2794902346785474e-05, | |
| "loss": 0.025315618515014647, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.7064528161133905, | |
| "grad_norm": 2.134981393814087, | |
| "learning_rate": 1.2743096927938664e-05, | |
| "loss": 0.04284055233001709, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.7157776948899666, | |
| "grad_norm": 2.9764657020568848, | |
| "learning_rate": 1.2691291509091852e-05, | |
| "loss": 0.02323296308517456, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.7251025736665424, | |
| "grad_norm": 0.15592370927333832, | |
| "learning_rate": 1.2639486090245042e-05, | |
| "loss": 0.03630294561386108, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 1.7344274524431182, | |
| "grad_norm": 1.1410564184188843, | |
| "learning_rate": 1.258768067139823e-05, | |
| "loss": 0.02985832929611206, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.743752331219694, | |
| "grad_norm": 1.3886200189590454, | |
| "learning_rate": 1.2535875252551418e-05, | |
| "loss": 0.019391053915023805, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 1.75307720999627, | |
| "grad_norm": 12.997761726379395, | |
| "learning_rate": 1.2484069833704606e-05, | |
| "loss": 0.02593435287475586, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.762402088772846, | |
| "grad_norm": 2.852426052093506, | |
| "learning_rate": 1.2432264414857796e-05, | |
| "loss": 0.036953463554382324, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.7717269675494218, | |
| "grad_norm": 1.0583350658416748, | |
| "learning_rate": 1.2380458996010984e-05, | |
| "loss": 0.025919597148895263, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.781051846325998, | |
| "grad_norm": 0.06280253827571869, | |
| "learning_rate": 1.2328653577164172e-05, | |
| "loss": 0.024321415424346925, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 1.7903767251025737, | |
| "grad_norm": 1.4471710920333862, | |
| "learning_rate": 1.227684815831736e-05, | |
| "loss": 0.02954728364944458, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.7997016038791496, | |
| "grad_norm": 0.3254970610141754, | |
| "learning_rate": 1.222504273947055e-05, | |
| "loss": 0.0403021764755249, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 1.8090264826557254, | |
| "grad_norm": 0.026926545426249504, | |
| "learning_rate": 1.217323732062374e-05, | |
| "loss": 0.01865153431892395, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.8183513614323012, | |
| "grad_norm": 0.037455275654792786, | |
| "learning_rate": 1.2121431901776928e-05, | |
| "loss": 0.02834453582763672, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 1.8276762402088773, | |
| "grad_norm": 1.9724242687225342, | |
| "learning_rate": 1.2069626482930114e-05, | |
| "loss": 0.02292172908782959, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.8370011189854532, | |
| "grad_norm": 2.2518837451934814, | |
| "learning_rate": 1.2017821064083304e-05, | |
| "loss": 0.029299042224884032, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 1.8463259977620292, | |
| "grad_norm": 0.8918629884719849, | |
| "learning_rate": 1.1966015645236493e-05, | |
| "loss": 0.02729450464248657, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.855650876538605, | |
| "grad_norm": 0.015352281741797924, | |
| "learning_rate": 1.1914210226389682e-05, | |
| "loss": 0.025739452838897704, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 1.864975755315181, | |
| "grad_norm": 12.12820816040039, | |
| "learning_rate": 1.1862404807542871e-05, | |
| "loss": 0.05083851337432861, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.8743006340917567, | |
| "grad_norm": 0.03783294931054115, | |
| "learning_rate": 1.1810599388696058e-05, | |
| "loss": 0.03862152814865112, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 1.8836255128683326, | |
| "grad_norm": 1.312626838684082, | |
| "learning_rate": 1.1758793969849248e-05, | |
| "loss": 0.040201754570007325, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.8929503916449086, | |
| "grad_norm": 0.27149704098701477, | |
| "learning_rate": 1.1706988551002436e-05, | |
| "loss": 0.020465714931488035, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 1.9022752704214845, | |
| "grad_norm": 0.051149722188711166, | |
| "learning_rate": 1.1655183132155625e-05, | |
| "loss": 0.019956029653549194, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.9116001491980605, | |
| "grad_norm": 0.012450406327843666, | |
| "learning_rate": 1.1603377713308812e-05, | |
| "loss": 0.030698204040527345, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 1.9209250279746364, | |
| "grad_norm": 0.04320710152387619, | |
| "learning_rate": 1.1551572294462002e-05, | |
| "loss": 0.01924089193344116, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.9302499067512122, | |
| "grad_norm": 0.028835974633693695, | |
| "learning_rate": 1.149976687561519e-05, | |
| "loss": 0.03460402250289917, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 1.939574785527788, | |
| "grad_norm": 0.032503023743629456, | |
| "learning_rate": 1.144796145676838e-05, | |
| "loss": 0.021525814533233642, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.948899664304364, | |
| "grad_norm": 0.054762404412031174, | |
| "learning_rate": 1.139615603792157e-05, | |
| "loss": 0.016051357984542845, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 1.95822454308094, | |
| "grad_norm": 0.41409963369369507, | |
| "learning_rate": 1.1344350619074756e-05, | |
| "loss": 0.02211181879043579, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.9675494218575158, | |
| "grad_norm": 0.6710904836654663, | |
| "learning_rate": 1.1292545200227944e-05, | |
| "loss": 0.03442399978637695, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 1.9768743006340919, | |
| "grad_norm": 0.20875470340251923, | |
| "learning_rate": 1.1240739781381133e-05, | |
| "loss": 0.029538695812225343, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.9861991794106677, | |
| "grad_norm": 7.152144432067871, | |
| "learning_rate": 1.1188934362534323e-05, | |
| "loss": 0.02945619821548462, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 1.9955240581872435, | |
| "grad_norm": 0.0340808629989624, | |
| "learning_rate": 1.113712894368751e-05, | |
| "loss": 0.021653232574462892, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9903632126116217, | |
| "eval_f1": 0.9054189162167566, | |
| "eval_loss": 0.04485788941383362, | |
| "eval_precision": 0.8987693529178246, | |
| "eval_recall": 0.9121676067687349, | |
| "eval_runtime": 7.0416, | |
| "eval_samples_per_second": 775.82, | |
| "eval_steps_per_second": 96.995, | |
| "step": 10724 | |
| }, | |
| { | |
| "epoch": 2.0048489369638194, | |
| "grad_norm": 0.006561782211065292, | |
| "learning_rate": 1.10853235248407e-05, | |
| "loss": 0.023352961540222168, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 2.0141738157403952, | |
| "grad_norm": 0.03905324265360832, | |
| "learning_rate": 1.1033518105993888e-05, | |
| "loss": 0.007830613255500794, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.023498694516971, | |
| "grad_norm": 0.002779081929475069, | |
| "learning_rate": 1.0981712687147077e-05, | |
| "loss": 0.012746865749359132, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 2.0328235732935473, | |
| "grad_norm": 3.28019642829895, | |
| "learning_rate": 1.0929907268300265e-05, | |
| "loss": 0.021561498641967772, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 2.042148452070123, | |
| "grad_norm": 0.11420201510190964, | |
| "learning_rate": 1.0878101849453453e-05, | |
| "loss": 0.01144665241241455, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 2.051473330846699, | |
| "grad_norm": 0.01909773238003254, | |
| "learning_rate": 1.0826296430606642e-05, | |
| "loss": 0.006808329224586487, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.060798209623275, | |
| "grad_norm": 0.03136987239122391, | |
| "learning_rate": 1.0774491011759831e-05, | |
| "loss": 0.015448588132858276, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 2.0701230883998507, | |
| "grad_norm": 0.0069969939067959785, | |
| "learning_rate": 1.072268559291302e-05, | |
| "loss": 0.020947656631469726, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 2.0794479671764265, | |
| "grad_norm": 0.008591468445956707, | |
| "learning_rate": 1.0670880174066208e-05, | |
| "loss": 0.013551335334777832, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 2.0887728459530024, | |
| "grad_norm": 0.007207474671304226, | |
| "learning_rate": 1.0619074755219396e-05, | |
| "loss": 0.010735607147216797, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 2.0980977247295787, | |
| "grad_norm": 0.007553383708000183, | |
| "learning_rate": 1.0567269336372585e-05, | |
| "loss": 0.0180646276473999, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 2.1074226035061545, | |
| "grad_norm": 0.0854165256023407, | |
| "learning_rate": 1.0515463917525775e-05, | |
| "loss": 0.012175880670547486, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 2.1167474822827304, | |
| "grad_norm": 1.2997490167617798, | |
| "learning_rate": 1.0463658498678963e-05, | |
| "loss": 0.028563385009765626, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 2.126072361059306, | |
| "grad_norm": 0.028747934848070145, | |
| "learning_rate": 1.041185307983215e-05, | |
| "loss": 0.008224156498908997, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 2.135397239835882, | |
| "grad_norm": 0.02653522975742817, | |
| "learning_rate": 1.036004766098534e-05, | |
| "loss": 0.014218298196792602, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 2.144722118612458, | |
| "grad_norm": 0.0075917416252195835, | |
| "learning_rate": 1.030824224213853e-05, | |
| "loss": 0.010074301958084106, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.1540469973890337, | |
| "grad_norm": 0.01568465493619442, | |
| "learning_rate": 1.0256436823291717e-05, | |
| "loss": 0.009785271286964416, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 2.16337187616561, | |
| "grad_norm": 2.6329779624938965, | |
| "learning_rate": 1.0204631404444907e-05, | |
| "loss": 0.039693479537963865, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 2.172696754942186, | |
| "grad_norm": 0.25744888186454773, | |
| "learning_rate": 1.0152825985598094e-05, | |
| "loss": 0.009682031273841858, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 2.1820216337187617, | |
| "grad_norm": 0.048078108578920364, | |
| "learning_rate": 1.0101020566751283e-05, | |
| "loss": 0.012061976194381714, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 2.1913465124953375, | |
| "grad_norm": 0.009185828268527985, | |
| "learning_rate": 1.0049215147904471e-05, | |
| "loss": 0.01473943829536438, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 2.2006713912719134, | |
| "grad_norm": 0.3359212279319763, | |
| "learning_rate": 9.99740972905766e-06, | |
| "loss": 0.022451975345611573, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 2.209996270048489, | |
| "grad_norm": 0.03128429129719734, | |
| "learning_rate": 9.94560431021085e-06, | |
| "loss": 0.015020393133163452, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 2.2193211488250655, | |
| "grad_norm": 0.01077917031943798, | |
| "learning_rate": 9.893798891364037e-06, | |
| "loss": 0.007385715842247009, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 2.2286460276016413, | |
| "grad_norm": 0.0009410646744072437, | |
| "learning_rate": 9.841993472517225e-06, | |
| "loss": 0.010898010730743408, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 2.237970906378217, | |
| "grad_norm": 0.23428411781787872, | |
| "learning_rate": 9.790188053670415e-06, | |
| "loss": 0.017517651319503783, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.247295785154793, | |
| "grad_norm": 5.2552947998046875, | |
| "learning_rate": 9.738382634823603e-06, | |
| "loss": 0.011954027414321899, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 2.256620663931369, | |
| "grad_norm": 0.1022522896528244, | |
| "learning_rate": 9.686577215976793e-06, | |
| "loss": 0.0103814697265625, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 2.2659455427079447, | |
| "grad_norm": 0.01425126288086176, | |
| "learning_rate": 9.634771797129981e-06, | |
| "loss": 0.016681231260299682, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 2.2752704214845205, | |
| "grad_norm": 0.010022806003689766, | |
| "learning_rate": 9.58296637828317e-06, | |
| "loss": 0.007602689266204834, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 2.2845953002610964, | |
| "grad_norm": 0.09281191229820251, | |
| "learning_rate": 9.531160959436357e-06, | |
| "loss": 0.015772578716278077, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 2.2939201790376726, | |
| "grad_norm": 1.6627157926559448, | |
| "learning_rate": 9.479355540589547e-06, | |
| "loss": 0.0149391770362854, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.3032450578142485, | |
| "grad_norm": 0.039720647037029266, | |
| "learning_rate": 9.427550121742735e-06, | |
| "loss": 0.004919275641441345, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 2.3125699365908243, | |
| "grad_norm": 0.13361865282058716, | |
| "learning_rate": 9.375744702895923e-06, | |
| "loss": 0.0066268140077590946, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 2.3218948153674, | |
| "grad_norm": 0.004165187943726778, | |
| "learning_rate": 9.323939284049113e-06, | |
| "loss": 0.008859132528305053, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 2.331219694143976, | |
| "grad_norm": 0.01734941452741623, | |
| "learning_rate": 9.272133865202301e-06, | |
| "loss": 0.01958281397819519, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.340544572920552, | |
| "grad_norm": 1.4992754459381104, | |
| "learning_rate": 9.22032844635549e-06, | |
| "loss": 0.024173910617828368, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 2.349869451697128, | |
| "grad_norm": 3.266171455383301, | |
| "learning_rate": 9.168523027508677e-06, | |
| "loss": 0.026157324314117433, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.359194330473704, | |
| "grad_norm": 0.034271348267793655, | |
| "learning_rate": 9.116717608661867e-06, | |
| "loss": 0.004791333377361298, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 2.36851920925028, | |
| "grad_norm": 0.020556321367621422, | |
| "learning_rate": 9.064912189815055e-06, | |
| "loss": 0.023116433620452882, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 2.3778440880268557, | |
| "grad_norm": 2.9007959365844727, | |
| "learning_rate": 9.013106770968243e-06, | |
| "loss": 0.003782390058040619, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 2.3871689668034315, | |
| "grad_norm": 0.04751985892653465, | |
| "learning_rate": 8.961301352121433e-06, | |
| "loss": 0.004796516001224518, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 2.3964938455800073, | |
| "grad_norm": 0.09174877405166626, | |
| "learning_rate": 8.909495933274621e-06, | |
| "loss": 0.01988631725311279, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 2.405818724356583, | |
| "grad_norm": 0.897373378276825, | |
| "learning_rate": 8.857690514427811e-06, | |
| "loss": 0.011833161115646362, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 2.4151436031331595, | |
| "grad_norm": 0.026099465787410736, | |
| "learning_rate": 8.805885095580999e-06, | |
| "loss": 0.021891412734985353, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 2.4244684819097353, | |
| "grad_norm": 0.005264167208224535, | |
| "learning_rate": 8.754079676734187e-06, | |
| "loss": 0.014649747610092164, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.433793360686311, | |
| "grad_norm": 0.0665712058544159, | |
| "learning_rate": 8.702274257887375e-06, | |
| "loss": 0.018104093074798586, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 2.443118239462887, | |
| "grad_norm": 0.01004517637193203, | |
| "learning_rate": 8.650468839040565e-06, | |
| "loss": 0.004754712581634521, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 2.452443118239463, | |
| "grad_norm": 0.011136854998767376, | |
| "learning_rate": 8.598663420193753e-06, | |
| "loss": 0.008313758969306946, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 2.4617679970160387, | |
| "grad_norm": 0.0015451794024556875, | |
| "learning_rate": 8.546858001346941e-06, | |
| "loss": 0.008117977380752563, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 2.471092875792615, | |
| "grad_norm": 1.5158227682113647, | |
| "learning_rate": 8.495052582500131e-06, | |
| "loss": 0.02230316638946533, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 2.480417754569191, | |
| "grad_norm": 0.015987800434231758, | |
| "learning_rate": 8.443247163653319e-06, | |
| "loss": 0.0033962687849998473, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 2.4897426333457666, | |
| "grad_norm": 0.2436022162437439, | |
| "learning_rate": 8.391441744806507e-06, | |
| "loss": 0.009453248977661134, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 2.4990675121223425, | |
| "grad_norm": 0.007971422746777534, | |
| "learning_rate": 8.339636325959695e-06, | |
| "loss": 0.005669102668762207, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 2.5083923908989183, | |
| "grad_norm": 0.030247289687395096, | |
| "learning_rate": 8.287830907112885e-06, | |
| "loss": 0.007165596485137939, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 2.517717269675494, | |
| "grad_norm": 0.03285367041826248, | |
| "learning_rate": 8.236025488266073e-06, | |
| "loss": 0.01023703694343567, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.52704214845207, | |
| "grad_norm": 1.4136919975280762, | |
| "learning_rate": 8.184220069419261e-06, | |
| "loss": 0.019952696561813355, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 2.5363670272286463, | |
| "grad_norm": 0.13177263736724854, | |
| "learning_rate": 8.132414650572451e-06, | |
| "loss": 0.008267701864242553, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 2.5456919060052217, | |
| "grad_norm": 13.017802238464355, | |
| "learning_rate": 8.080609231725639e-06, | |
| "loss": 0.013502672910690308, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 2.555016784781798, | |
| "grad_norm": 20.80805015563965, | |
| "learning_rate": 8.028803812878829e-06, | |
| "loss": 0.014624173641204835, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 2.564341663558374, | |
| "grad_norm": 0.05195024982094765, | |
| "learning_rate": 7.976998394032017e-06, | |
| "loss": 0.025228326320648194, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 2.5736665423349496, | |
| "grad_norm": 0.004629973322153091, | |
| "learning_rate": 7.925192975185205e-06, | |
| "loss": 0.02166285514831543, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 2.5829914211115255, | |
| "grad_norm": 0.0022503056097775698, | |
| "learning_rate": 7.873387556338393e-06, | |
| "loss": 0.02188849925994873, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 2.5923162998881013, | |
| "grad_norm": 0.8524413108825684, | |
| "learning_rate": 7.821582137491583e-06, | |
| "loss": 0.007161260843276978, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 2.6016411786646776, | |
| "grad_norm": 2.9589359760284424, | |
| "learning_rate": 7.769776718644771e-06, | |
| "loss": 0.009217590093612671, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 2.6109660574412534, | |
| "grad_norm": 0.0014888152945786715, | |
| "learning_rate": 7.717971299797959e-06, | |
| "loss": 0.007640480399131775, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.6202909362178293, | |
| "grad_norm": 0.0024451257195323706, | |
| "learning_rate": 7.666165880951149e-06, | |
| "loss": 0.009097555875778198, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 2.629615814994405, | |
| "grad_norm": 1.4727226495742798, | |
| "learning_rate": 7.614360462104337e-06, | |
| "loss": 0.022815148830413818, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 2.638940693770981, | |
| "grad_norm": 0.14492234587669373, | |
| "learning_rate": 7.562555043257526e-06, | |
| "loss": 0.00907568097114563, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 2.648265572547557, | |
| "grad_norm": 0.006422064267098904, | |
| "learning_rate": 7.510749624410714e-06, | |
| "loss": 0.012345269918441773, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 2.6575904513241326, | |
| "grad_norm": 0.003297192510217428, | |
| "learning_rate": 7.458944205563903e-06, | |
| "loss": 0.013943998813629151, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 2.666915330100709, | |
| "grad_norm": 0.009486474096775055, | |
| "learning_rate": 7.407138786717091e-06, | |
| "loss": 0.007204347848892212, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 2.6762402088772848, | |
| "grad_norm": 0.001906346995383501, | |
| "learning_rate": 7.35533336787028e-06, | |
| "loss": 0.0064238041639328005, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 2.6855650876538606, | |
| "grad_norm": 0.009013752453029156, | |
| "learning_rate": 7.303527949023469e-06, | |
| "loss": 0.019118592739105225, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 2.6948899664304364, | |
| "grad_norm": 9.203516006469727, | |
| "learning_rate": 7.251722530176657e-06, | |
| "loss": 0.014843382835388184, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 2.7042148452070123, | |
| "grad_norm": 0.013872411102056503, | |
| "learning_rate": 7.199917111329846e-06, | |
| "loss": 0.017863935232162474, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.713539723983588, | |
| "grad_norm": 0.008059196174144745, | |
| "learning_rate": 7.148111692483034e-06, | |
| "loss": 0.007313421964645386, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 2.722864602760164, | |
| "grad_norm": 0.007967078126966953, | |
| "learning_rate": 7.096306273636223e-06, | |
| "loss": 0.005426759123802185, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 2.7321894815367402, | |
| "grad_norm": 0.052042555063962936, | |
| "learning_rate": 7.044500854789411e-06, | |
| "loss": 0.010500948429107666, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 2.741514360313316, | |
| "grad_norm": 0.008007310330867767, | |
| "learning_rate": 6.9926954359426e-06, | |
| "loss": 0.011817890405654907, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 2.750839239089892, | |
| "grad_norm": 0.553403377532959, | |
| "learning_rate": 6.940890017095788e-06, | |
| "loss": 0.011694425344467163, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 2.7601641178664678, | |
| "grad_norm": 0.011203479021787643, | |
| "learning_rate": 6.889084598248977e-06, | |
| "loss": 0.009664978981018067, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 2.7694889966430436, | |
| "grad_norm": 0.031599052250385284, | |
| "learning_rate": 6.837279179402167e-06, | |
| "loss": 0.0062562096118927, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 2.7788138754196194, | |
| "grad_norm": 0.07515502721071243, | |
| "learning_rate": 6.785473760555355e-06, | |
| "loss": 0.008186891674995422, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 2.7881387541961953, | |
| "grad_norm": 0.004041098989546299, | |
| "learning_rate": 6.733668341708544e-06, | |
| "loss": 0.008758670091629029, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 2.7974636329727716, | |
| "grad_norm": 0.010477816686034203, | |
| "learning_rate": 6.681862922861732e-06, | |
| "loss": 0.008421186804771424, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.8067885117493474, | |
| "grad_norm": 0.037119459360837936, | |
| "learning_rate": 6.630057504014921e-06, | |
| "loss": 0.0044859576225280764, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 2.8161133905259232, | |
| "grad_norm": 2.2909059524536133, | |
| "learning_rate": 6.578252085168109e-06, | |
| "loss": 0.007467656135559082, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 2.825438269302499, | |
| "grad_norm": 0.028654785826802254, | |
| "learning_rate": 6.526446666321298e-06, | |
| "loss": 0.011730804443359374, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 2.834763148079075, | |
| "grad_norm": 0.00396377919241786, | |
| "learning_rate": 6.474641247474487e-06, | |
| "loss": 0.007885778546333313, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 2.8440880268556508, | |
| "grad_norm": 4.526209354400635, | |
| "learning_rate": 6.422835828627675e-06, | |
| "loss": 0.013013125658035278, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 2.8534129056322266, | |
| "grad_norm": 0.006890705320984125, | |
| "learning_rate": 6.371030409780864e-06, | |
| "loss": 0.020241425037384034, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 2.862737784408803, | |
| "grad_norm": 0.04351874813437462, | |
| "learning_rate": 6.319224990934052e-06, | |
| "loss": 0.02235487461090088, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 2.8720626631853787, | |
| "grad_norm": 0.004027783405035734, | |
| "learning_rate": 6.267419572087241e-06, | |
| "loss": 0.012025052309036255, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 2.8813875419619546, | |
| "grad_norm": 0.017081253230571747, | |
| "learning_rate": 6.215614153240429e-06, | |
| "loss": 0.012213168144226074, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 2.8907124207385304, | |
| "grad_norm": 5.580208778381348, | |
| "learning_rate": 6.163808734393618e-06, | |
| "loss": 0.011020108461380004, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.9000372995151062, | |
| "grad_norm": 0.026449229568243027, | |
| "learning_rate": 6.112003315546806e-06, | |
| "loss": 0.020866034030914308, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 2.909362178291682, | |
| "grad_norm": 0.018465599045157433, | |
| "learning_rate": 6.060197896699996e-06, | |
| "loss": 0.006286224126815796, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 2.918687057068258, | |
| "grad_norm": 0.004978302400559187, | |
| "learning_rate": 6.008392477853185e-06, | |
| "loss": 0.01425373911857605, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 2.928011935844834, | |
| "grad_norm": 0.008023403584957123, | |
| "learning_rate": 5.956587059006373e-06, | |
| "loss": 0.008588857650756836, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 2.93733681462141, | |
| "grad_norm": 0.014545072801411152, | |
| "learning_rate": 5.904781640159562e-06, | |
| "loss": 0.009176114797592163, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 2.946661693397986, | |
| "grad_norm": 0.0036765779368579388, | |
| "learning_rate": 5.85297622131275e-06, | |
| "loss": 0.019455695152282716, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 2.9559865721745617, | |
| "grad_norm": 8.891608238220215, | |
| "learning_rate": 5.801170802465939e-06, | |
| "loss": 0.012102892398834228, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 2.9653114509511376, | |
| "grad_norm": 0.05219835415482521, | |
| "learning_rate": 5.749365383619127e-06, | |
| "loss": 0.009157007336616516, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 2.9746363297277134, | |
| "grad_norm": 0.003453275188803673, | |
| "learning_rate": 5.697559964772316e-06, | |
| "loss": 0.009056896567344666, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 2.9839612085042893, | |
| "grad_norm": 0.004969852045178413, | |
| "learning_rate": 5.645754545925505e-06, | |
| "loss": 0.012932000160217285, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.9932860872808655, | |
| "grad_norm": 2.0237090587615967, | |
| "learning_rate": 5.593949127078693e-06, | |
| "loss": 0.018866615295410158, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9921177364024406, | |
| "eval_f1": 0.9189297124600639, | |
| "eval_loss": 0.04323037713766098, | |
| "eval_precision": 0.9109263657957245, | |
| "eval_recall": 0.9270749395648671, | |
| "eval_runtime": 7.1026, | |
| "eval_samples_per_second": 769.16, | |
| "eval_steps_per_second": 96.163, | |
| "step": 16086 | |
| }, | |
| { | |
| "epoch": 3.0026109660574414, | |
| "grad_norm": 0.006980204954743385, | |
| "learning_rate": 5.542143708231882e-06, | |
| "loss": 0.012530730962753296, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 3.011935844834017, | |
| "grad_norm": 0.008406821638345718, | |
| "learning_rate": 5.49033828938507e-06, | |
| "loss": 0.0016689696907997132, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 3.021260723610593, | |
| "grad_norm": 0.26228681206703186, | |
| "learning_rate": 5.438532870538259e-06, | |
| "loss": 0.007807348966598511, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 3.030585602387169, | |
| "grad_norm": 0.47371771931648254, | |
| "learning_rate": 5.386727451691447e-06, | |
| "loss": 0.006382474303245544, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 3.0399104811637447, | |
| "grad_norm": 0.0065447427332401276, | |
| "learning_rate": 5.334922032844636e-06, | |
| "loss": 0.004743200242519379, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 3.0492353599403206, | |
| "grad_norm": 0.008346166461706161, | |
| "learning_rate": 5.283116613997824e-06, | |
| "loss": 0.0055571597814559935, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 3.058560238716897, | |
| "grad_norm": 5.690232276916504, | |
| "learning_rate": 5.2313111951510135e-06, | |
| "loss": 0.00496139645576477, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 3.0678851174934727, | |
| "grad_norm": 0.3915584981441498, | |
| "learning_rate": 5.1795057763042025e-06, | |
| "loss": 0.0035722294449806215, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 3.0772099962700485, | |
| "grad_norm": 0.002272524405270815, | |
| "learning_rate": 5.1277003574573906e-06, | |
| "loss": 0.011130574941635132, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.0865348750466244, | |
| "grad_norm": 2.395972967147827, | |
| "learning_rate": 5.0758949386105795e-06, | |
| "loss": 0.003059500753879547, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 3.0958597538232002, | |
| "grad_norm": 0.004218028858304024, | |
| "learning_rate": 5.024089519763768e-06, | |
| "loss": 0.0016028760373592377, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 3.105184632599776, | |
| "grad_norm": 15.134767532348633, | |
| "learning_rate": 4.9722841009169565e-06, | |
| "loss": 0.008084517717361451, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 3.114509511376352, | |
| "grad_norm": 0.0018907383782789111, | |
| "learning_rate": 4.920478682070145e-06, | |
| "loss": 0.003314727246761322, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 3.123834390152928, | |
| "grad_norm": 0.0029481553938239813, | |
| "learning_rate": 4.8686732632233335e-06, | |
| "loss": 0.0049530166387557984, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 3.133159268929504, | |
| "grad_norm": 0.16056513786315918, | |
| "learning_rate": 4.8168678443765225e-06, | |
| "loss": 0.004786551296710968, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 3.14248414770608, | |
| "grad_norm": 0.2876565158367157, | |
| "learning_rate": 4.7650624255297106e-06, | |
| "loss": 0.0019748318195343018, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 3.1518090264826557, | |
| "grad_norm": 0.0028331561479717493, | |
| "learning_rate": 4.7132570066828995e-06, | |
| "loss": 0.002513662874698639, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 3.1611339052592315, | |
| "grad_norm": 0.006282527931034565, | |
| "learning_rate": 4.6614515878360884e-06, | |
| "loss": 0.01659904956817627, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 3.1704587840358074, | |
| "grad_norm": 0.007699803449213505, | |
| "learning_rate": 4.6096461689892765e-06, | |
| "loss": 0.00718508780002594, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.1797836628123832, | |
| "grad_norm": 0.00149145582690835, | |
| "learning_rate": 4.5578407501424655e-06, | |
| "loss": 0.001901312619447708, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 3.1891085415889595, | |
| "grad_norm": 0.019138796254992485, | |
| "learning_rate": 4.5060353312956535e-06, | |
| "loss": 0.01903280019760132, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 3.1984334203655354, | |
| "grad_norm": 0.544906497001648, | |
| "learning_rate": 4.4542299124488425e-06, | |
| "loss": 0.004833935499191284, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 3.207758299142111, | |
| "grad_norm": 0.009669867344200611, | |
| "learning_rate": 4.402424493602031e-06, | |
| "loss": 0.00512764036655426, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 3.217083177918687, | |
| "grad_norm": 0.01658560521900654, | |
| "learning_rate": 4.3506190747552195e-06, | |
| "loss": 0.0008115243166685104, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 3.226408056695263, | |
| "grad_norm": 0.024577626958489418, | |
| "learning_rate": 4.2988136559084084e-06, | |
| "loss": 0.0021865896880626677, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 3.2357329354718387, | |
| "grad_norm": 32.374088287353516, | |
| "learning_rate": 4.247008237061597e-06, | |
| "loss": 0.004616082906723023, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 3.2450578142484146, | |
| "grad_norm": 0.06743080914020538, | |
| "learning_rate": 4.1952028182147855e-06, | |
| "loss": 0.0010297740995883942, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 3.254382693024991, | |
| "grad_norm": 0.0024560948368161917, | |
| "learning_rate": 4.143397399367974e-06, | |
| "loss": 0.01637653708457947, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 3.2637075718015667, | |
| "grad_norm": 0.00366505840793252, | |
| "learning_rate": 4.0915919805211625e-06, | |
| "loss": 0.0007262816280126571, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 3.2730324505781425, | |
| "grad_norm": 0.02281450480222702, | |
| "learning_rate": 4.039786561674351e-06, | |
| "loss": 0.0034805700182914735, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 3.2823573293547184, | |
| "grad_norm": 0.0021532338578253984, | |
| "learning_rate": 3.98798114282754e-06, | |
| "loss": 0.005815493464469909, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 3.291682208131294, | |
| "grad_norm": 0.025134483352303505, | |
| "learning_rate": 3.9361757239807284e-06, | |
| "loss": 0.0013174866139888763, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 3.30100708690787, | |
| "grad_norm": 0.011261076666414738, | |
| "learning_rate": 3.884370305133917e-06, | |
| "loss": 0.002750571370124817, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 3.310331965684446, | |
| "grad_norm": 0.005523109342902899, | |
| "learning_rate": 3.832564886287106e-06, | |
| "loss": 0.006260217428207398, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 3.319656844461022, | |
| "grad_norm": 0.0017233422258868814, | |
| "learning_rate": 3.780759467440295e-06, | |
| "loss": 0.002834466993808746, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 3.328981723237598, | |
| "grad_norm": 0.3033665120601654, | |
| "learning_rate": 3.7289540485934833e-06, | |
| "loss": 0.0014140091836452485, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 3.338306602014174, | |
| "grad_norm": 0.007635418325662613, | |
| "learning_rate": 3.677148629746672e-06, | |
| "loss": 0.009239104390144349, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 3.3476314807907497, | |
| "grad_norm": 0.008077415637671947, | |
| "learning_rate": 3.6253432108998604e-06, | |
| "loss": 0.0028353652358055117, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 3.3569563595673255, | |
| "grad_norm": 0.0055144126527011395, | |
| "learning_rate": 3.573537792053049e-06, | |
| "loss": 0.01257444977760315, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.3662812383439014, | |
| "grad_norm": 0.10481590777635574, | |
| "learning_rate": 3.5217323732062374e-06, | |
| "loss": 0.008266312479972839, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 3.375606117120477, | |
| "grad_norm": 0.004028915427625179, | |
| "learning_rate": 3.469926954359426e-06, | |
| "loss": 0.00322272926568985, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 3.3849309958970535, | |
| "grad_norm": 0.007838011719286442, | |
| "learning_rate": 3.4181215355126153e-06, | |
| "loss": 0.0017492137849330902, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 3.3942558746736293, | |
| "grad_norm": 0.008134761825203896, | |
| "learning_rate": 3.3663161166658038e-06, | |
| "loss": 0.004277588129043579, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 3.403580753450205, | |
| "grad_norm": 1.130017638206482, | |
| "learning_rate": 3.3145106978189923e-06, | |
| "loss": 0.003342975378036499, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 3.412905632226781, | |
| "grad_norm": 0.0033679301850497723, | |
| "learning_rate": 3.262705278972181e-06, | |
| "loss": 0.005864649415016175, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 3.422230511003357, | |
| "grad_norm": 1.1952660083770752, | |
| "learning_rate": 3.2108998601253693e-06, | |
| "loss": 0.009021402597427368, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 3.4315553897799327, | |
| "grad_norm": 0.4340899884700775, | |
| "learning_rate": 3.159094441278558e-06, | |
| "loss": 0.0038458964228630065, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 3.4408802685565085, | |
| "grad_norm": 0.007966181263327599, | |
| "learning_rate": 3.1072890224317463e-06, | |
| "loss": 0.004797542989253997, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 3.450205147333085, | |
| "grad_norm": 0.0008151158690452576, | |
| "learning_rate": 3.055483603584935e-06, | |
| "loss": 0.008470645546913147, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.4595300261096606, | |
| "grad_norm": 0.0033519044518470764, | |
| "learning_rate": 3.003678184738124e-06, | |
| "loss": 0.004539164900779724, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 3.4688549048862365, | |
| "grad_norm": 0.043223973363637924, | |
| "learning_rate": 2.9518727658913127e-06, | |
| "loss": 0.0028054285049438476, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 3.4781797836628123, | |
| "grad_norm": 0.011569101363420486, | |
| "learning_rate": 2.9000673470445012e-06, | |
| "loss": 0.007232290506362915, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 3.487504662439388, | |
| "grad_norm": 0.007914524525403976, | |
| "learning_rate": 2.8482619281976897e-06, | |
| "loss": 0.0002942212298512459, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 3.496829541215964, | |
| "grad_norm": 0.0006849826313555241, | |
| "learning_rate": 2.7964565093508782e-06, | |
| "loss": 0.00410827487707138, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 3.50615441999254, | |
| "grad_norm": 0.0026959700044244528, | |
| "learning_rate": 2.7446510905040668e-06, | |
| "loss": 0.0011774758994579316, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 3.515479298769116, | |
| "grad_norm": 0.0012961579486727715, | |
| "learning_rate": 2.6928456716572553e-06, | |
| "loss": 0.0034612080454826354, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 3.524804177545692, | |
| "grad_norm": 0.06650816649198532, | |
| "learning_rate": 2.6410402528104438e-06, | |
| "loss": 0.013415820598602295, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 3.534129056322268, | |
| "grad_norm": 0.0007635413203388453, | |
| "learning_rate": 2.589234833963633e-06, | |
| "loss": 0.0015386410057544708, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 3.5434539350988437, | |
| "grad_norm": 0.001662875059992075, | |
| "learning_rate": 2.5374294151168216e-06, | |
| "loss": 0.002086118161678314, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.5527788138754195, | |
| "grad_norm": 0.0031695894431322813, | |
| "learning_rate": 2.48562399627001e-06, | |
| "loss": 0.0013567799329757691, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 3.562103692651996, | |
| "grad_norm": 0.019759224727749825, | |
| "learning_rate": 2.4338185774231987e-06, | |
| "loss": 0.008338750004768372, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 3.571428571428571, | |
| "grad_norm": 0.01757100783288479, | |
| "learning_rate": 2.382013158576387e-06, | |
| "loss": 0.0016921743750572204, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 3.5807534502051475, | |
| "grad_norm": 2.4479777812957764, | |
| "learning_rate": 2.3302077397295757e-06, | |
| "loss": 0.011807719469070435, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 3.5900783289817233, | |
| "grad_norm": 12.382244110107422, | |
| "learning_rate": 2.2784023208827646e-06, | |
| "loss": 0.005563015937805176, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 3.599403207758299, | |
| "grad_norm": 0.012547838501632214, | |
| "learning_rate": 2.226596902035953e-06, | |
| "loss": 0.00420228123664856, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 3.608728086534875, | |
| "grad_norm": 0.009670069441199303, | |
| "learning_rate": 2.1747914831891417e-06, | |
| "loss": 0.005122922658920288, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 3.618052965311451, | |
| "grad_norm": 0.3705468773841858, | |
| "learning_rate": 2.12298606434233e-06, | |
| "loss": 0.0016570650041103363, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 3.627377844088027, | |
| "grad_norm": 0.03667959198355675, | |
| "learning_rate": 2.071180645495519e-06, | |
| "loss": 0.008318853378295899, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 3.6367027228646025, | |
| "grad_norm": 0.026855269446969032, | |
| "learning_rate": 2.0193752266487076e-06, | |
| "loss": 0.011855947971343993, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.646027601641179, | |
| "grad_norm": 0.004127690568566322, | |
| "learning_rate": 1.967569807801896e-06, | |
| "loss": 0.0037176933884620665, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 3.6553524804177546, | |
| "grad_norm": 0.033966220915317535, | |
| "learning_rate": 1.9157643889550846e-06, | |
| "loss": 0.0034821495413780213, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 3.6646773591943305, | |
| "grad_norm": 0.008595237508416176, | |
| "learning_rate": 1.8639589701082736e-06, | |
| "loss": 0.003079477548599243, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 3.6740022379709063, | |
| "grad_norm": 0.18409447371959686, | |
| "learning_rate": 1.812153551261462e-06, | |
| "loss": 0.0028409546613693236, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 3.683327116747482, | |
| "grad_norm": 0.009265055879950523, | |
| "learning_rate": 1.7603481324146506e-06, | |
| "loss": 0.001990189254283905, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 3.6926519955240584, | |
| "grad_norm": 0.004075230099260807, | |
| "learning_rate": 1.7085427135678393e-06, | |
| "loss": 0.009914104342460633, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 3.701976874300634, | |
| "grad_norm": 0.012730306945741177, | |
| "learning_rate": 1.656737294721028e-06, | |
| "loss": 0.00510865867137909, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 3.71130175307721, | |
| "grad_norm": 0.9975103735923767, | |
| "learning_rate": 1.6049318758742165e-06, | |
| "loss": 0.00289763867855072, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 3.720626631853786, | |
| "grad_norm": 0.14549227058887482, | |
| "learning_rate": 1.553126457027405e-06, | |
| "loss": 0.014574718475341798, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 3.729951510630362, | |
| "grad_norm": 0.002959158504381776, | |
| "learning_rate": 1.5013210381805938e-06, | |
| "loss": 0.0035466670989990233, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.7392763894069376, | |
| "grad_norm": 0.044310204684734344, | |
| "learning_rate": 1.4495156193337825e-06, | |
| "loss": 0.005526635646820068, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 3.7486012681835135, | |
| "grad_norm": 0.06063301861286163, | |
| "learning_rate": 1.397710200486971e-06, | |
| "loss": 0.01945833921432495, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 3.7579261469600898, | |
| "grad_norm": 1.9790464639663696, | |
| "learning_rate": 1.3459047816401597e-06, | |
| "loss": 0.009157074689865112, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 3.767251025736665, | |
| "grad_norm": 0.002332707168534398, | |
| "learning_rate": 1.2940993627933483e-06, | |
| "loss": 0.003599865138530731, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 3.7765759045132414, | |
| "grad_norm": 0.007876844145357609, | |
| "learning_rate": 1.242293943946537e-06, | |
| "loss": 0.002118881195783615, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 3.7859007832898173, | |
| "grad_norm": 0.06033371388912201, | |
| "learning_rate": 1.1904885250997255e-06, | |
| "loss": 0.013430379629135132, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 3.795225662066393, | |
| "grad_norm": 0.007944832555949688, | |
| "learning_rate": 1.1386831062529142e-06, | |
| "loss": 0.011043739318847657, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 3.804550540842969, | |
| "grad_norm": 0.009239411912858486, | |
| "learning_rate": 1.0868776874061027e-06, | |
| "loss": 0.007748922109603882, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 3.813875419619545, | |
| "grad_norm": 0.0014006602577865124, | |
| "learning_rate": 1.0350722685592914e-06, | |
| "loss": 0.006385021805763245, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 3.823200298396121, | |
| "grad_norm": 1.487459421157837, | |
| "learning_rate": 9.8326684971248e-07, | |
| "loss": 0.009500337243080139, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.832525177172697, | |
| "grad_norm": 0.029605276882648468, | |
| "learning_rate": 9.314614308656686e-07, | |
| "loss": 0.003571970164775848, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 3.8418500559492728, | |
| "grad_norm": 0.0038495927583426237, | |
| "learning_rate": 8.796560120188573e-07, | |
| "loss": 0.006721885204315186, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 3.8511749347258486, | |
| "grad_norm": 0.015677325427532196, | |
| "learning_rate": 8.278505931720458e-07, | |
| "loss": 0.0021590781211853027, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 3.8604998135024244, | |
| "grad_norm": 0.00773986428976059, | |
| "learning_rate": 7.760451743252345e-07, | |
| "loss": 0.0072018647193908695, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 3.8698246922790003, | |
| "grad_norm": 0.006910277064889669, | |
| "learning_rate": 7.24239755478423e-07, | |
| "loss": 0.0017287896573543549, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 3.879149571055576, | |
| "grad_norm": 0.16674445569515228, | |
| "learning_rate": 6.724343366316118e-07, | |
| "loss": 0.003435662090778351, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 3.8884744498321524, | |
| "grad_norm": 0.11410090327262878, | |
| "learning_rate": 6.206289177848004e-07, | |
| "loss": 0.0015013472735881806, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 3.8977993286087282, | |
| "grad_norm": 0.1628509759902954, | |
| "learning_rate": 5.68823498937989e-07, | |
| "loss": 0.0035611391067504883, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 3.907124207385304, | |
| "grad_norm": 0.22206370532512665, | |
| "learning_rate": 5.170180800911776e-07, | |
| "loss": 0.005973511338233948, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 3.91644908616188, | |
| "grad_norm": 0.285854309797287, | |
| "learning_rate": 4.6521266124436624e-07, | |
| "loss": 0.002481496632099152, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.9257739649384558, | |
| "grad_norm": 0.0018572107655927539, | |
| "learning_rate": 4.1340724239755486e-07, | |
| "loss": 0.0034265148639678954, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 3.9350988437150316, | |
| "grad_norm": 0.00111959979403764, | |
| "learning_rate": 3.6160182355074347e-07, | |
| "loss": 0.003972585201263428, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 3.9444237224916074, | |
| "grad_norm": 0.006579425185918808, | |
| "learning_rate": 3.0979640470393204e-07, | |
| "loss": 0.007484051585197449, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 3.9537486012681837, | |
| "grad_norm": 0.0016149668954312801, | |
| "learning_rate": 2.5799098585712066e-07, | |
| "loss": 0.005609593391418457, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 3.9630734800447596, | |
| "grad_norm": 8.504377365112305, | |
| "learning_rate": 2.061855670103093e-07, | |
| "loss": 0.00788326621055603, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 3.9723983588213354, | |
| "grad_norm": 0.29231831431388855, | |
| "learning_rate": 1.5438014816349792e-07, | |
| "loss": 0.0017250549793243408, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 3.9817232375979112, | |
| "grad_norm": 0.006406121421605349, | |
| "learning_rate": 1.0257472931668653e-07, | |
| "loss": 0.0028238424658775328, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 3.991048116374487, | |
| "grad_norm": 0.005788094364106655, | |
| "learning_rate": 5.076931046987516e-08, | |
| "loss": 0.0027461829781532288, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9926021944640846, | |
| "eval_f1": 0.9201277955271566, | |
| "eval_loss": 0.045499056577682495, | |
| "eval_precision": 0.9121140142517815, | |
| "eval_recall": 0.9282836422240129, | |
| "eval_runtime": 7.0731, | |
| "eval_samples_per_second": 772.358, | |
| "eval_steps_per_second": 96.562, | |
| "step": 21448 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 21448, | |
| "total_flos": 2300195917669620.0, | |
| "train_loss": 0.0979897177003356, | |
| "train_runtime": 1420.7755, | |
| "train_samples_per_second": 120.762, | |
| "train_steps_per_second": 15.096 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 21448, | |
| "validation_accuracy": 0.9926283813863357, | |
| "validation_f1": 0.9199760526840951, | |
| "validation_loss": 0.04548870399594307, | |
| "validation_precision": 0.911427441676552, | |
| "validation_recall": 0.9286865431103949, | |
| "validation_runtime": 6.1777, | |
| "validation_samples_per_second": 884.309, | |
| "validation_steps_per_second": 110.559 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 21448, | |
| "test_accuracy": 0.9926283813863357, | |
| "test_f1": 0.9199760526840951, | |
| "test_loss": 0.04548870399594307, | |
| "test_precision": 0.911427441676552, | |
| "test_recall": 0.9286865431103949, | |
| "test_runtime": 6.2499, | |
| "test_samples_per_second": 874.087, | |
| "test_steps_per_second": 109.281 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 21448, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2300195917669620.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |