| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 5187, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00963948332369385, | |
| "grad_norm": 6.363353729248047, | |
| "learning_rate": 4.9527665317139e-05, | |
| "loss": 5.7207, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0192789666473877, | |
| "grad_norm": 6.101255416870117, | |
| "learning_rate": 4.904569115095431e-05, | |
| "loss": 4.3487, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02891844997108155, | |
| "grad_norm": 5.563638210296631, | |
| "learning_rate": 4.856371698476962e-05, | |
| "loss": 4.077, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0385579332947754, | |
| "grad_norm": 5.894318580627441, | |
| "learning_rate": 4.8081742818584925e-05, | |
| "loss": 3.8687, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04819741661846925, | |
| "grad_norm": 5.281033992767334, | |
| "learning_rate": 4.7599768652400236e-05, | |
| "loss": 3.698, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0578368999421631, | |
| "grad_norm": 5.9448089599609375, | |
| "learning_rate": 4.711779448621554e-05, | |
| "loss": 3.5277, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06747638326585695, | |
| "grad_norm": 5.122015953063965, | |
| "learning_rate": 4.6635820320030846e-05, | |
| "loss": 3.524, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.0771158665895508, | |
| "grad_norm": 5.508582592010498, | |
| "learning_rate": 4.615384615384616e-05, | |
| "loss": 3.3969, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08675534991324466, | |
| "grad_norm": 4.6635661125183105, | |
| "learning_rate": 4.567187198766146e-05, | |
| "loss": 3.4104, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.0963948332369385, | |
| "grad_norm": 4.993603706359863, | |
| "learning_rate": 4.5189897821476775e-05, | |
| "loss": 3.317, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.10603431656063235, | |
| "grad_norm": 4.281131267547607, | |
| "learning_rate": 4.470792365529208e-05, | |
| "loss": 3.27, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1156737998843262, | |
| "grad_norm": 4.340400218963623, | |
| "learning_rate": 4.4225949489107385e-05, | |
| "loss": 3.2469, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.12531328320802004, | |
| "grad_norm": 4.594860076904297, | |
| "learning_rate": 4.374397532292269e-05, | |
| "loss": 3.1855, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.1349527665317139, | |
| "grad_norm": 4.50878381729126, | |
| "learning_rate": 4.3262001156738e-05, | |
| "loss": 3.1646, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.14459224985540775, | |
| "grad_norm": 4.4101715087890625, | |
| "learning_rate": 4.278002699055331e-05, | |
| "loss": 3.0478, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.1542317331791016, | |
| "grad_norm": 4.285751819610596, | |
| "learning_rate": 4.229805282436862e-05, | |
| "loss": 3.0654, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.16387121650279546, | |
| "grad_norm": 4.346066474914551, | |
| "learning_rate": 4.1816078658183924e-05, | |
| "loss": 3.1125, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.1735106998264893, | |
| "grad_norm": 4.069290637969971, | |
| "learning_rate": 4.133410449199923e-05, | |
| "loss": 3.0389, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.18315018315018314, | |
| "grad_norm": 4.5510382652282715, | |
| "learning_rate": 4.0852130325814534e-05, | |
| "loss": 2.9808, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.192789666473877, | |
| "grad_norm": 4.202208995819092, | |
| "learning_rate": 4.0370156159629845e-05, | |
| "loss": 3.0209, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.20242914979757085, | |
| "grad_norm": 5.317561149597168, | |
| "learning_rate": 3.988818199344515e-05, | |
| "loss": 3.0355, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.2120686331212647, | |
| "grad_norm": 4.339507102966309, | |
| "learning_rate": 3.940620782726046e-05, | |
| "loss": 2.9353, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.22170811644495855, | |
| "grad_norm": 4.036981105804443, | |
| "learning_rate": 3.8924233661075774e-05, | |
| "loss": 2.9902, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.2313475997686524, | |
| "grad_norm": 4.5993757247924805, | |
| "learning_rate": 3.844225949489107e-05, | |
| "loss": 2.9957, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.24098708309234626, | |
| "grad_norm": 4.273294448852539, | |
| "learning_rate": 3.7960285328706384e-05, | |
| "loss": 2.9309, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.2506265664160401, | |
| "grad_norm": 4.0223774909973145, | |
| "learning_rate": 3.747831116252169e-05, | |
| "loss": 2.8886, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.26026604973973394, | |
| "grad_norm": 4.813283443450928, | |
| "learning_rate": 3.6996336996337e-05, | |
| "loss": 2.7496, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.2699055330634278, | |
| "grad_norm": 4.476084232330322, | |
| "learning_rate": 3.6514362830152306e-05, | |
| "loss": 2.8673, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.27954501638712165, | |
| "grad_norm": 4.051555156707764, | |
| "learning_rate": 3.603238866396762e-05, | |
| "loss": 2.8135, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.2891844997108155, | |
| "grad_norm": 4.186788558959961, | |
| "learning_rate": 3.5550414497782916e-05, | |
| "loss": 2.9019, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.29882398303450936, | |
| "grad_norm": 4.216615200042725, | |
| "learning_rate": 3.506844033159823e-05, | |
| "loss": 2.8877, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.3084634663582032, | |
| "grad_norm": 4.653785705566406, | |
| "learning_rate": 3.458646616541353e-05, | |
| "loss": 2.8091, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.31810294968189706, | |
| "grad_norm": 3.883335828781128, | |
| "learning_rate": 3.4104491999228844e-05, | |
| "loss": 2.7521, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.3277424330055909, | |
| "grad_norm": 4.467517375946045, | |
| "learning_rate": 3.362251783304415e-05, | |
| "loss": 2.7753, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.33738191632928477, | |
| "grad_norm": 3.839921474456787, | |
| "learning_rate": 3.314054366685946e-05, | |
| "loss": 2.7853, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.3470213996529786, | |
| "grad_norm": 3.923483371734619, | |
| "learning_rate": 3.2658569500674766e-05, | |
| "loss": 2.872, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.3566608829766725, | |
| "grad_norm": 4.523361682891846, | |
| "learning_rate": 3.217659533449007e-05, | |
| "loss": 2.7785, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.3663003663003663, | |
| "grad_norm": 3.865365743637085, | |
| "learning_rate": 3.169462116830538e-05, | |
| "loss": 2.8373, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.37593984962406013, | |
| "grad_norm": 3.9936673641204834, | |
| "learning_rate": 3.121264700212069e-05, | |
| "loss": 2.7323, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.385579332947754, | |
| "grad_norm": 4.1067633628845215, | |
| "learning_rate": 3.0730672835936e-05, | |
| "loss": 2.7585, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.39521881627144784, | |
| "grad_norm": 3.85208797454834, | |
| "learning_rate": 3.02486986697513e-05, | |
| "loss": 2.7564, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.4048582995951417, | |
| "grad_norm": 4.24629020690918, | |
| "learning_rate": 2.9766724503566613e-05, | |
| "loss": 2.7274, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.41449778291883554, | |
| "grad_norm": 3.905611276626587, | |
| "learning_rate": 2.9284750337381918e-05, | |
| "loss": 2.7094, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.4241372662425294, | |
| "grad_norm": 3.9592058658599854, | |
| "learning_rate": 2.8802776171197226e-05, | |
| "loss": 2.7046, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.43377674956622325, | |
| "grad_norm": 3.860285520553589, | |
| "learning_rate": 2.832080200501253e-05, | |
| "loss": 2.7154, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.4434162328899171, | |
| "grad_norm": 3.989696502685547, | |
| "learning_rate": 2.783882783882784e-05, | |
| "loss": 2.6632, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.45305571621361096, | |
| "grad_norm": 3.987741708755493, | |
| "learning_rate": 2.7356853672643145e-05, | |
| "loss": 2.7178, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.4626951995373048, | |
| "grad_norm": 3.9146411418914795, | |
| "learning_rate": 2.6874879506458457e-05, | |
| "loss": 2.7039, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.47233468286099867, | |
| "grad_norm": 4.281154155731201, | |
| "learning_rate": 2.639290534027376e-05, | |
| "loss": 2.6205, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.4819741661846925, | |
| "grad_norm": 3.6197686195373535, | |
| "learning_rate": 2.591093117408907e-05, | |
| "loss": 2.723, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.4916136495083864, | |
| "grad_norm": 3.7195041179656982, | |
| "learning_rate": 2.5428957007904375e-05, | |
| "loss": 2.6071, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.5012531328320802, | |
| "grad_norm": 3.775972604751587, | |
| "learning_rate": 2.4946982841719683e-05, | |
| "loss": 2.6749, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.5108926161557741, | |
| "grad_norm": 3.9212749004364014, | |
| "learning_rate": 2.4465008675534992e-05, | |
| "loss": 2.6913, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.5205320994794679, | |
| "grad_norm": 3.9374866485595703, | |
| "learning_rate": 2.39830345093503e-05, | |
| "loss": 2.6457, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5301715828031618, | |
| "grad_norm": 4.192444801330566, | |
| "learning_rate": 2.3501060343165605e-05, | |
| "loss": 2.7204, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.5398110661268556, | |
| "grad_norm": 3.428612232208252, | |
| "learning_rate": 2.3019086176980914e-05, | |
| "loss": 2.7221, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.5494505494505495, | |
| "grad_norm": 4.013959884643555, | |
| "learning_rate": 2.2537112010796222e-05, | |
| "loss": 2.6046, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.5590900327742433, | |
| "grad_norm": 4.100067138671875, | |
| "learning_rate": 2.205513784461153e-05, | |
| "loss": 2.6887, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.5687295160979372, | |
| "grad_norm": 3.5404722690582275, | |
| "learning_rate": 2.157316367842684e-05, | |
| "loss": 2.5933, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.578368999421631, | |
| "grad_norm": 3.6547091007232666, | |
| "learning_rate": 2.1091189512242147e-05, | |
| "loss": 2.6171, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5880084827453248, | |
| "grad_norm": 3.81042742729187, | |
| "learning_rate": 2.0609215346057452e-05, | |
| "loss": 2.5319, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.5976479660690187, | |
| "grad_norm": 3.987117052078247, | |
| "learning_rate": 2.012724117987276e-05, | |
| "loss": 2.6596, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.6072874493927125, | |
| "grad_norm": 3.5897133350372314, | |
| "learning_rate": 1.964526701368807e-05, | |
| "loss": 2.634, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.6169269327164064, | |
| "grad_norm": 4.190171241760254, | |
| "learning_rate": 1.9163292847503374e-05, | |
| "loss": 2.5889, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.6265664160401002, | |
| "grad_norm": 3.7671003341674805, | |
| "learning_rate": 1.8681318681318682e-05, | |
| "loss": 2.6186, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.6362058993637941, | |
| "grad_norm": 4.126290798187256, | |
| "learning_rate": 1.819934451513399e-05, | |
| "loss": 2.5847, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6458453826874879, | |
| "grad_norm": 4.023561000823975, | |
| "learning_rate": 1.7717370348949296e-05, | |
| "loss": 2.5474, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.6554848660111818, | |
| "grad_norm": 3.9225897789001465, | |
| "learning_rate": 1.7235396182764604e-05, | |
| "loss": 2.6056, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.6651243493348756, | |
| "grad_norm": 3.6160168647766113, | |
| "learning_rate": 1.6753422016579912e-05, | |
| "loss": 2.5559, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.6747638326585695, | |
| "grad_norm": 4.005686283111572, | |
| "learning_rate": 1.6271447850395217e-05, | |
| "loss": 2.5416, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.6844033159822633, | |
| "grad_norm": 3.8741414546966553, | |
| "learning_rate": 1.5789473684210526e-05, | |
| "loss": 2.5972, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.6940427993059572, | |
| "grad_norm": 3.710710048675537, | |
| "learning_rate": 1.5307499518025834e-05, | |
| "loss": 2.5787, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.703682282629651, | |
| "grad_norm": 3.460242748260498, | |
| "learning_rate": 1.4825525351841141e-05, | |
| "loss": 2.554, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.713321765953345, | |
| "grad_norm": 3.8803932666778564, | |
| "learning_rate": 1.4343551185656451e-05, | |
| "loss": 2.5613, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.7229612492770388, | |
| "grad_norm": 3.8178253173828125, | |
| "learning_rate": 1.3861577019471758e-05, | |
| "loss": 2.5626, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.7326007326007326, | |
| "grad_norm": 3.364790201187134, | |
| "learning_rate": 1.3379602853287066e-05, | |
| "loss": 2.5721, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.7422402159244265, | |
| "grad_norm": 3.5198776721954346, | |
| "learning_rate": 1.2897628687102373e-05, | |
| "loss": 2.5233, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.7518796992481203, | |
| "grad_norm": 3.782043695449829, | |
| "learning_rate": 1.241565452091768e-05, | |
| "loss": 2.541, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.7615191825718142, | |
| "grad_norm": 4.032742023468018, | |
| "learning_rate": 1.1933680354732988e-05, | |
| "loss": 2.493, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.771158665895508, | |
| "grad_norm": 3.8995766639709473, | |
| "learning_rate": 1.1451706188548295e-05, | |
| "loss": 2.5361, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.7807981492192019, | |
| "grad_norm": 3.67946457862854, | |
| "learning_rate": 1.0969732022363601e-05, | |
| "loss": 2.5408, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.7904376325428957, | |
| "grad_norm": 3.4958484172821045, | |
| "learning_rate": 1.048775785617891e-05, | |
| "loss": 2.5261, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.8000771158665896, | |
| "grad_norm": 3.8448803424835205, | |
| "learning_rate": 1.0005783689994216e-05, | |
| "loss": 2.5389, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.8097165991902834, | |
| "grad_norm": 3.7735507488250732, | |
| "learning_rate": 9.523809523809523e-06, | |
| "loss": 2.6247, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.8193560825139773, | |
| "grad_norm": 3.4487788677215576, | |
| "learning_rate": 9.041835357624833e-06, | |
| "loss": 2.5704, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.8289955658376711, | |
| "grad_norm": 3.943000316619873, | |
| "learning_rate": 8.55986119144014e-06, | |
| "loss": 2.5289, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.838635049161365, | |
| "grad_norm": 3.7842445373535156, | |
| "learning_rate": 8.077887025255447e-06, | |
| "loss": 2.5765, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.8482745324850588, | |
| "grad_norm": 3.741563320159912, | |
| "learning_rate": 7.595912859070754e-06, | |
| "loss": 2.4869, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.8579140158087527, | |
| "grad_norm": 3.6693384647369385, | |
| "learning_rate": 7.113938692886062e-06, | |
| "loss": 2.5513, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.8675534991324465, | |
| "grad_norm": 3.590758800506592, | |
| "learning_rate": 6.631964526701369e-06, | |
| "loss": 2.566, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.8771929824561403, | |
| "grad_norm": 3.282844305038452, | |
| "learning_rate": 6.149990360516677e-06, | |
| "loss": 2.5001, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.8868324657798342, | |
| "grad_norm": 4.462714195251465, | |
| "learning_rate": 5.668016194331984e-06, | |
| "loss": 2.4838, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.896471949103528, | |
| "grad_norm": 4.180957317352295, | |
| "learning_rate": 5.186042028147292e-06, | |
| "loss": 2.477, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.9061114324272219, | |
| "grad_norm": 4.506474018096924, | |
| "learning_rate": 4.7040678619625985e-06, | |
| "loss": 2.5097, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.9157509157509157, | |
| "grad_norm": 3.257143974304199, | |
| "learning_rate": 4.222093695777907e-06, | |
| "loss": 2.6252, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.9253903990746096, | |
| "grad_norm": 3.498189926147461, | |
| "learning_rate": 3.740119529593214e-06, | |
| "loss": 2.5133, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.9350298823983034, | |
| "grad_norm": 3.404567241668701, | |
| "learning_rate": 3.258145363408521e-06, | |
| "loss": 2.4482, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.9446693657219973, | |
| "grad_norm": 3.703936815261841, | |
| "learning_rate": 2.776171197223829e-06, | |
| "loss": 2.5769, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.9543088490456911, | |
| "grad_norm": 4.4313883781433105, | |
| "learning_rate": 2.2941970310391366e-06, | |
| "loss": 2.5262, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.963948332369385, | |
| "grad_norm": 3.5869264602661133, | |
| "learning_rate": 1.8122228648544438e-06, | |
| "loss": 2.5166, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.9735878156930788, | |
| "grad_norm": 3.5782413482666016, | |
| "learning_rate": 1.3302486986697513e-06, | |
| "loss": 2.4681, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.9832272990167727, | |
| "grad_norm": 3.565708637237549, | |
| "learning_rate": 8.482745324850588e-07, | |
| "loss": 2.4661, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.9928667823404665, | |
| "grad_norm": 3.5679666996002197, | |
| "learning_rate": 3.663003663003663e-07, | |
| "loss": 2.5452, | |
| "step": 5150 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 5187, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1355321769984000.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |