| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9872122762148337, | |
| "eval_steps": 500, | |
| "global_step": 585, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05115089514066496, | |
| "grad_norm": 0.8142303824424744, | |
| "learning_rate": 0.00019985583705641418, | |
| "loss": 1.7207, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.10230179028132992, | |
| "grad_norm": 0.29472672939300537, | |
| "learning_rate": 0.0001994237638847428, | |
| "loss": 0.4573, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1534526854219949, | |
| "grad_norm": 0.47930070757865906, | |
| "learning_rate": 0.00019870502626379127, | |
| "loss": 0.3752, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.20460358056265984, | |
| "grad_norm": 0.5465103387832642, | |
| "learning_rate": 0.00019770169650018172, | |
| "loss": 0.3156, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2557544757033248, | |
| "grad_norm": 0.10476361960172653, | |
| "learning_rate": 0.00019641666745335624, | |
| "loss": 0.2775, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3069053708439898, | |
| "grad_norm": 0.11213192343711853, | |
| "learning_rate": 0.00019485364419471454, | |
| "loss": 0.268, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.35805626598465473, | |
| "grad_norm": 0.11295895278453827, | |
| "learning_rate": 0.00019301713332493386, | |
| "loss": 0.2658, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.4092071611253197, | |
| "grad_norm": 0.10884763300418854, | |
| "learning_rate": 0.0001909124299802724, | |
| "loss": 0.2612, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.46035805626598464, | |
| "grad_norm": 0.11470718681812286, | |
| "learning_rate": 0.000188545602565321, | |
| "loss": 0.2551, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5115089514066496, | |
| "grad_norm": 0.1919143944978714, | |
| "learning_rate": 0.0001859234752562217, | |
| "loss": 0.2528, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5626598465473146, | |
| "grad_norm": 0.09824435412883759, | |
| "learning_rate": 0.00018305360832480117, | |
| "loss": 0.2551, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6138107416879796, | |
| "grad_norm": 0.10242178291082382, | |
| "learning_rate": 0.00017994427634035015, | |
| "loss": 0.2494, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6649616368286445, | |
| "grad_norm": 0.1205296516418457, | |
| "learning_rate": 0.0001766044443118978, | |
| "loss": 0.2476, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7161125319693095, | |
| "grad_norm": 0.09416373819112778, | |
| "learning_rate": 0.00017304374183977033, | |
| "loss": 0.247, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7672634271099744, | |
| "grad_norm": 0.11074596643447876, | |
| "learning_rate": 0.00016927243535095997, | |
| "loss": 0.2453, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8184143222506394, | |
| "grad_norm": 0.0919942781329155, | |
| "learning_rate": 0.0001653013984983585, | |
| "loss": 0.2446, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 0.08940616995096207, | |
| "learning_rate": 0.00016114208080920123, | |
| "loss": 0.2427, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9207161125319693, | |
| "grad_norm": 0.10754715651273727, | |
| "learning_rate": 0.00015680647467311557, | |
| "loss": 0.2392, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9718670076726342, | |
| "grad_norm": 0.09791406244039536, | |
| "learning_rate": 0.00015230708076495775, | |
| "loss": 0.2382, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.020460358056266, | |
| "grad_norm": 0.10740767419338226, | |
| "learning_rate": 0.0001476568720021308, | |
| "loss": 0.2395, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0716112531969308, | |
| "grad_norm": 0.12122884392738342, | |
| "learning_rate": 0.00014286925614030542, | |
| "loss": 0.235, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.1227621483375958, | |
| "grad_norm": 0.09706810116767883, | |
| "learning_rate": 0.00013795803711538966, | |
| "loss": 0.2345, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.1739130434782608, | |
| "grad_norm": 0.08798359334468842, | |
| "learning_rate": 0.00013293737524320797, | |
| "loss": 0.2382, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.2250639386189257, | |
| "grad_norm": 0.09915865212678909, | |
| "learning_rate": 0.0001278217463916453, | |
| "loss": 0.2313, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.2762148337595907, | |
| "grad_norm": 0.09308797121047974, | |
| "learning_rate": 0.00012262590024297225, | |
| "loss": 0.2323, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.3273657289002558, | |
| "grad_norm": 0.09695378690958023, | |
| "learning_rate": 0.00011736481776669306, | |
| "loss": 0.2308, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.3785166240409208, | |
| "grad_norm": 0.09910827875137329, | |
| "learning_rate": 0.0001120536680255323, | |
| "loss": 0.2287, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.4296675191815857, | |
| "grad_norm": 0.10939918458461761, | |
| "learning_rate": 0.00010670776443910024, | |
| "loss": 0.2314, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.4808184143222507, | |
| "grad_norm": 0.11851569265127182, | |
| "learning_rate": 0.00010134252063133975, | |
| "loss": 0.2308, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.5319693094629157, | |
| "grad_norm": 0.09513761848211288, | |
| "learning_rate": 9.597340598905852e-05, | |
| "loss": 0.2299, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.5831202046035806, | |
| "grad_norm": 0.09376522153615952, | |
| "learning_rate": 9.061590105968208e-05, | |
| "loss": 0.228, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.6342710997442456, | |
| "grad_norm": 0.08351844549179077, | |
| "learning_rate": 8.528545291682838e-05, | |
| "loss": 0.226, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.6854219948849105, | |
| "grad_norm": 0.08564090728759766, | |
| "learning_rate": 7.999743062239557e-05, | |
| "loss": 0.2284, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.7365728900255755, | |
| "grad_norm": 0.08012343943119049, | |
| "learning_rate": 7.476708091357782e-05, | |
| "loss": 0.2243, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.7877237851662404, | |
| "grad_norm": 0.10309313982725143, | |
| "learning_rate": 6.960948424257532e-05, | |
| "loss": 0.2263, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.8388746803069054, | |
| "grad_norm": 0.08845160156488419, | |
| "learning_rate": 6.453951129574644e-05, | |
| "loss": 0.2249, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.8900255754475703, | |
| "grad_norm": 0.0904114693403244, | |
| "learning_rate": 5.957178011756952e-05, | |
| "loss": 0.2246, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.9411764705882353, | |
| "grad_norm": 0.10043754428625107, | |
| "learning_rate": 5.472061396303629e-05, | |
| "loss": 0.2237, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.9923273657289002, | |
| "grad_norm": 0.08460038900375366, | |
| "learning_rate": 5.000000000000002e-05, | |
| "loss": 0.2237, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.040920716112532, | |
| "grad_norm": 0.09718713909387589, | |
| "learning_rate": 4.542354898054953e-05, | |
| "loss": 0.2239, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.0920716112531967, | |
| "grad_norm": 0.09453165531158447, | |
| "learning_rate": 4.100445599768774e-05, | |
| "loss": 0.2207, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.1432225063938617, | |
| "grad_norm": 0.08744117617607117, | |
| "learning_rate": 3.675546244046228e-05, | |
| "loss": 0.2183, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.1943734015345266, | |
| "grad_norm": 0.08627672493457794, | |
| "learning_rate": 3.268881925724297e-05, | |
| "loss": 0.2215, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.2455242966751916, | |
| "grad_norm": 0.08695816248655319, | |
| "learning_rate": 2.881625163306596e-05, | |
| "loss": 0.2194, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.296675191815857, | |
| "grad_norm": 0.08501548320055008, | |
| "learning_rate": 2.514892518288988e-05, | |
| "loss": 0.2183, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.3478260869565215, | |
| "grad_norm": 0.08836157619953156, | |
| "learning_rate": 2.1697413758237784e-05, | |
| "loss": 0.2196, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.398976982097187, | |
| "grad_norm": 0.08334845304489136, | |
| "learning_rate": 1.8471668960045574e-05, | |
| "loss": 0.2197, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.4501278772378514, | |
| "grad_norm": 0.09064343571662903, | |
| "learning_rate": 1.5480991445620542e-05, | |
| "loss": 0.2179, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.501278772378517, | |
| "grad_norm": 0.09054526686668396, | |
| "learning_rate": 1.2734004112438568e-05, | |
| "loss": 0.2192, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.5524296675191813, | |
| "grad_norm": 0.08981845527887344, | |
| "learning_rate": 1.0238627236098619e-05, | |
| "loss": 0.2179, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.6035805626598467, | |
| "grad_norm": 0.09078031778335571, | |
| "learning_rate": 8.002055634117578e-06, | |
| "loss": 0.2203, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.6547314578005117, | |
| "grad_norm": 0.09208182245492935, | |
| "learning_rate": 6.030737921409169e-06, | |
| "loss": 0.2188, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.7058823529411766, | |
| "grad_norm": 0.0891033411026001, | |
| "learning_rate": 4.3303579172574885e-06, | |
| "loss": 0.2192, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.7570332480818416, | |
| "grad_norm": 0.08560579270124435, | |
| "learning_rate": 2.905818257394799e-06, | |
| "loss": 0.22, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.8081841432225065, | |
| "grad_norm": 0.08466843515634537, | |
| "learning_rate": 1.7612262584335237e-06, | |
| "loss": 0.223, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.8593350383631715, | |
| "grad_norm": 0.08303829282522202, | |
| "learning_rate": 8.998820754091531e-07, | |
| "loss": 0.2164, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.9104859335038364, | |
| "grad_norm": 0.08513195812702179, | |
| "learning_rate": 3.2426918657900704e-07, | |
| "loss": 0.2192, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.9616368286445014, | |
| "grad_norm": 0.09391626715660095, | |
| "learning_rate": 3.60472329114625e-08, | |
| "loss": 0.2209, | |
| "step": 580 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 585, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1181893221660754e+18, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |