| { | |
| "best_global_step": 90, | |
| "best_metric": 255.55859375, | |
| "best_model_checkpoint": "/home/notebook/code/group/eason/ms-swift/qwen2.5_72b_swift_allen/v1-20251030-150430/checkpoint-90", | |
| "epoch": 2.040920716112532, | |
| "eval_steps": 10, | |
| "global_step": 100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.020460358056265986, | |
| "grad_norm": 8.740862846374512, | |
| "learning_rate": 1.25e-06, | |
| "loss": 1.323974609375, | |
| "step": 1, | |
| "token_acc": 0.6749949473048041 | |
| }, | |
| { | |
| "epoch": 0.04092071611253197, | |
| "grad_norm": 8.77861499786377, | |
| "learning_rate": 2.5e-06, | |
| "loss": 1.3330078125, | |
| "step": 2, | |
| "token_acc": 0.673973689521971 | |
| }, | |
| { | |
| "epoch": 0.061381074168797956, | |
| "grad_norm": 6.631825923919678, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 1.296875, | |
| "step": 3, | |
| "token_acc": 0.6779223915870493 | |
| }, | |
| { | |
| "epoch": 0.08184143222506395, | |
| "grad_norm": 3.3304812908172607, | |
| "learning_rate": 5e-06, | |
| "loss": 1.2734375, | |
| "step": 4, | |
| "token_acc": 0.6801284548421133 | |
| }, | |
| { | |
| "epoch": 0.10230179028132992, | |
| "grad_norm": 2.9929847717285156, | |
| "learning_rate": 6.25e-06, | |
| "loss": 1.2674560546875, | |
| "step": 5, | |
| "token_acc": 0.6774330054044837 | |
| }, | |
| { | |
| "epoch": 0.12276214833759591, | |
| "grad_norm": 3.2737808227539062, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 1.24853515625, | |
| "step": 6, | |
| "token_acc": 0.6828065892133894 | |
| }, | |
| { | |
| "epoch": 0.1432225063938619, | |
| "grad_norm": 3.864370107650757, | |
| "learning_rate": 8.750000000000001e-06, | |
| "loss": 1.2008056640625, | |
| "step": 7, | |
| "token_acc": 0.6894752982753333 | |
| }, | |
| { | |
| "epoch": 0.1636828644501279, | |
| "grad_norm": 3.059185743331909, | |
| "learning_rate": 1e-05, | |
| "loss": 1.16259765625, | |
| "step": 8, | |
| "token_acc": 0.695852402365021 | |
| }, | |
| { | |
| "epoch": 0.18414322250639387, | |
| "grad_norm": 2.160026788711548, | |
| "learning_rate": 9.99872299773906e-06, | |
| "loss": 1.1551513671875, | |
| "step": 9, | |
| "token_acc": 0.696011262965415 | |
| }, | |
| { | |
| "epoch": 0.20460358056265984, | |
| "grad_norm": 1.7391501665115356, | |
| "learning_rate": 9.994892643250147e-06, | |
| "loss": 1.102783203125, | |
| "step": 10, | |
| "token_acc": 0.7084828844476136 | |
| }, | |
| { | |
| "epoch": 0.20460358056265984, | |
| "eval_loss": 318.328125, | |
| "eval_runtime": 174.7551, | |
| "eval_samples_per_second": 0.086, | |
| "eval_steps_per_second": 0.006, | |
| "eval_token_acc": 0.714127394915891, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.22506393861892582, | |
| "grad_norm": 1.512490153312683, | |
| "learning_rate": 9.9885108930818e-06, | |
| "loss": 1.059814453125, | |
| "step": 11, | |
| "token_acc": 0.7156657958041264 | |
| }, | |
| { | |
| "epoch": 0.24552429667519182, | |
| "grad_norm": 1.2666767835617065, | |
| "learning_rate": 9.979581007037776e-06, | |
| "loss": 1.0445556640625, | |
| "step": 12, | |
| "token_acc": 0.7163779567590186 | |
| }, | |
| { | |
| "epoch": 0.2659846547314578, | |
| "grad_norm": 1.035067081451416, | |
| "learning_rate": 9.968107546511942e-06, | |
| "loss": 1.0322265625, | |
| "step": 13, | |
| "token_acc": 0.7191054482580511 | |
| }, | |
| { | |
| "epoch": 0.2864450127877238, | |
| "grad_norm": 1.0283350944519043, | |
| "learning_rate": 9.95409637215831e-06, | |
| "loss": 1.0177001953125, | |
| "step": 14, | |
| "token_acc": 0.7223777681726882 | |
| }, | |
| { | |
| "epoch": 0.3069053708439898, | |
| "grad_norm": 0.8945289254188538, | |
| "learning_rate": 9.937554640897414e-06, | |
| "loss": 0.9921875, | |
| "step": 15, | |
| "token_acc": 0.7268454397965844 | |
| }, | |
| { | |
| "epoch": 0.3273657289002558, | |
| "grad_norm": 0.9290580749511719, | |
| "learning_rate": 9.918490802260538e-06, | |
| "loss": 0.9925537109375, | |
| "step": 16, | |
| "token_acc": 0.7261628646104965 | |
| }, | |
| { | |
| "epoch": 0.34782608695652173, | |
| "grad_norm": 0.9589850902557373, | |
| "learning_rate": 9.896914594073703e-06, | |
| "loss": 0.9844970703125, | |
| "step": 17, | |
| "token_acc": 0.7282743852241678 | |
| }, | |
| { | |
| "epoch": 0.36828644501278773, | |
| "grad_norm": 0.8926294445991516, | |
| "learning_rate": 9.87283703748356e-06, | |
| "loss": 0.955322265625, | |
| "step": 18, | |
| "token_acc": 0.7333408249225458 | |
| }, | |
| { | |
| "epoch": 0.3887468030690537, | |
| "grad_norm": 0.7655003666877747, | |
| "learning_rate": 9.846270431327793e-06, | |
| "loss": 0.97998046875, | |
| "step": 19, | |
| "token_acc": 0.7289209223794986 | |
| }, | |
| { | |
| "epoch": 0.4092071611253197, | |
| "grad_norm": 0.8207703828811646, | |
| "learning_rate": 9.817228345852853e-06, | |
| "loss": 0.9736328125, | |
| "step": 20, | |
| "token_acc": 0.7293907637079884 | |
| }, | |
| { | |
| "epoch": 0.4092071611253197, | |
| "eval_loss": 282.9296875, | |
| "eval_runtime": 184.9183, | |
| "eval_samples_per_second": 0.081, | |
| "eval_steps_per_second": 0.005, | |
| "eval_token_acc": 0.7367637851149977, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.4296675191815857, | |
| "grad_norm": 0.899739682674408, | |
| "learning_rate": 9.785725615782262e-06, | |
| "loss": 0.951171875, | |
| "step": 21, | |
| "token_acc": 0.7350280185329514 | |
| }, | |
| { | |
| "epoch": 0.45012787723785164, | |
| "grad_norm": 0.7847328186035156, | |
| "learning_rate": 9.751778332739033e-06, | |
| "loss": 0.955322265625, | |
| "step": 22, | |
| "token_acc": 0.7339763941093482 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 0.8828109502792358, | |
| "learning_rate": 9.715403837026046e-06, | |
| "loss": 0.928466796875, | |
| "step": 23, | |
| "token_acc": 0.7396126780013036 | |
| }, | |
| { | |
| "epoch": 0.49104859335038364, | |
| "grad_norm": 0.9227666258811951, | |
| "learning_rate": 9.676620708768608e-06, | |
| "loss": 0.940673828125, | |
| "step": 24, | |
| "token_acc": 0.7378745153330983 | |
| }, | |
| { | |
| "epoch": 0.5115089514066496, | |
| "grad_norm": 0.6232196092605591, | |
| "learning_rate": 9.635448758423703e-06, | |
| "loss": 0.9249267578125, | |
| "step": 25, | |
| "token_acc": 0.7404127180940572 | |
| }, | |
| { | |
| "epoch": 0.5319693094629157, | |
| "grad_norm": 0.7773280143737793, | |
| "learning_rate": 9.591909016660806e-06, | |
| "loss": 0.9281005859375, | |
| "step": 26, | |
| "token_acc": 0.7391337224366917 | |
| }, | |
| { | |
| "epoch": 0.5524296675191815, | |
| "grad_norm": 0.63581383228302, | |
| "learning_rate": 9.546023723619387e-06, | |
| "loss": 0.9176025390625, | |
| "step": 27, | |
| "token_acc": 0.7424947635543382 | |
| }, | |
| { | |
| "epoch": 0.5728900255754475, | |
| "grad_norm": 0.7504338622093201, | |
| "learning_rate": 9.497816317548625e-06, | |
| "loss": 0.9302978515625, | |
| "step": 28, | |
| "token_acc": 0.7391818850545303 | |
| }, | |
| { | |
| "epoch": 0.5933503836317136, | |
| "grad_norm": 0.5946778655052185, | |
| "learning_rate": 9.447311422835141e-06, | |
| "loss": 0.9202880859375, | |
| "step": 29, | |
| "token_acc": 0.7407723631931363 | |
| }, | |
| { | |
| "epoch": 0.6138107416879796, | |
| "grad_norm": 0.787707507610321, | |
| "learning_rate": 9.39453483742483e-06, | |
| "loss": 0.911865234375, | |
| "step": 30, | |
| "token_acc": 0.7428029623155381 | |
| }, | |
| { | |
| "epoch": 0.6138107416879796, | |
| "eval_loss": 271.0625, | |
| "eval_runtime": 183.1288, | |
| "eval_samples_per_second": 0.082, | |
| "eval_steps_per_second": 0.005, | |
| "eval_token_acc": 0.7445882205618399, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.6342710997442456, | |
| "grad_norm": 0.607540488243103, | |
| "learning_rate": 9.33951351964525e-06, | |
| "loss": 0.9012451171875, | |
| "step": 31, | |
| "token_acc": 0.745131512392514 | |
| }, | |
| { | |
| "epoch": 0.6547314578005116, | |
| "grad_norm": 0.7581419348716736, | |
| "learning_rate": 9.28227557443528e-06, | |
| "loss": 0.921875, | |
| "step": 32, | |
| "token_acc": 0.739635471164067 | |
| }, | |
| { | |
| "epoch": 0.6751918158567775, | |
| "grad_norm": 0.5728419423103333, | |
| "learning_rate": 9.222850238989104e-06, | |
| "loss": 0.91064453125, | |
| "step": 33, | |
| "token_acc": 0.7417856423282375 | |
| }, | |
| { | |
| "epoch": 0.6956521739130435, | |
| "grad_norm": 0.7030539512634277, | |
| "learning_rate": 9.161267867821802e-06, | |
| "loss": 0.90576171875, | |
| "step": 34, | |
| "token_acc": 0.7440411485273096 | |
| }, | |
| { | |
| "epoch": 0.7161125319693095, | |
| "grad_norm": 0.7187775373458862, | |
| "learning_rate": 9.097559917264268e-06, | |
| "loss": 0.915283203125, | |
| "step": 35, | |
| "token_acc": 0.7416095001092865 | |
| }, | |
| { | |
| "epoch": 0.7365728900255755, | |
| "grad_norm": 1.2131768465042114, | |
| "learning_rate": 9.031758929395259e-06, | |
| "loss": 0.88720703125, | |
| "step": 36, | |
| "token_acc": 0.7491847812640715 | |
| }, | |
| { | |
| "epoch": 0.7570332480818415, | |
| "grad_norm": 0.6976324319839478, | |
| "learning_rate": 8.963898515418885e-06, | |
| "loss": 0.910400390625, | |
| "step": 37, | |
| "token_acc": 0.742646390581947 | |
| }, | |
| { | |
| "epoch": 0.7774936061381074, | |
| "grad_norm": 0.6271430253982544, | |
| "learning_rate": 8.89401333849598e-06, | |
| "loss": 0.8946533203125, | |
| "step": 38, | |
| "token_acc": 0.7461849707519417 | |
| }, | |
| { | |
| "epoch": 0.7979539641943734, | |
| "grad_norm": 0.9629178643226624, | |
| "learning_rate": 8.82213909603812e-06, | |
| "loss": 0.891357421875, | |
| "step": 39, | |
| "token_acc": 0.7467142339485449 | |
| }, | |
| { | |
| "epoch": 0.8184143222506394, | |
| "grad_norm": 0.5696749091148376, | |
| "learning_rate": 8.748312501473351e-06, | |
| "loss": 0.889404296875, | |
| "step": 40, | |
| "token_acc": 0.7480125858101083 | |
| }, | |
| { | |
| "epoch": 0.8184143222506394, | |
| "eval_loss": 264.984375, | |
| "eval_runtime": 182.0545, | |
| "eval_samples_per_second": 0.082, | |
| "eval_steps_per_second": 0.005, | |
| "eval_token_acc": 0.7485849647284718, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.8388746803069054, | |
| "grad_norm": 0.6953923106193542, | |
| "learning_rate": 8.672571265492944e-06, | |
| "loss": 0.904296875, | |
| "step": 41, | |
| "token_acc": 0.7444278902019779 | |
| }, | |
| { | |
| "epoch": 0.8593350383631714, | |
| "grad_norm": 0.6765785217285156, | |
| "learning_rate": 8.594954076788736e-06, | |
| "loss": 0.891845703125, | |
| "step": 42, | |
| "token_acc": 0.7476815443943472 | |
| }, | |
| { | |
| "epoch": 0.8797953964194374, | |
| "grad_norm": 0.7245502471923828, | |
| "learning_rate": 8.515500582290914e-06, | |
| "loss": 0.890625, | |
| "step": 43, | |
| "token_acc": 0.7461978035518999 | |
| }, | |
| { | |
| "epoch": 0.9002557544757033, | |
| "grad_norm": 0.5907047986984253, | |
| "learning_rate": 8.434251366916323e-06, | |
| "loss": 0.9033203125, | |
| "step": 44, | |
| "token_acc": 0.7438690767483638 | |
| }, | |
| { | |
| "epoch": 0.9207161125319693, | |
| "grad_norm": 0.6903477311134338, | |
| "learning_rate": 8.351247932837655e-06, | |
| "loss": 0.894775390625, | |
| "step": 45, | |
| "token_acc": 0.745345279252677 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 0.6156295537948608, | |
| "learning_rate": 8.266532678284103e-06, | |
| "loss": 0.8800048828125, | |
| "step": 46, | |
| "token_acc": 0.7496958725344752 | |
| }, | |
| { | |
| "epoch": 0.9616368286445013, | |
| "grad_norm": 0.6671141982078552, | |
| "learning_rate": 8.18014887588431e-06, | |
| "loss": 0.883544921875, | |
| "step": 47, | |
| "token_acc": 0.7479351354819822 | |
| }, | |
| { | |
| "epoch": 0.9820971867007673, | |
| "grad_norm": 0.5917587280273438, | |
| "learning_rate": 8.092140650562665e-06, | |
| "loss": 0.882080078125, | |
| "step": 48, | |
| "token_acc": 0.7484929482805501 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.930376410484314, | |
| "learning_rate": 8.002552957000254e-06, | |
| "loss": 0.8798828125, | |
| "step": 49, | |
| "token_acc": 0.7485103827283421 | |
| }, | |
| { | |
| "epoch": 1.020460358056266, | |
| "grad_norm": 0.7340478301048279, | |
| "learning_rate": 7.911431556671967e-06, | |
| "loss": 0.8404541015625, | |
| "step": 50, | |
| "token_acc": 0.7561591178820095 | |
| }, | |
| { | |
| "epoch": 1.020460358056266, | |
| "eval_loss": 261.390625, | |
| "eval_runtime": 186.3503, | |
| "eval_samples_per_second": 0.08, | |
| "eval_steps_per_second": 0.005, | |
| "eval_token_acc": 0.7507617815252328, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.040920716112532, | |
| "grad_norm": 0.6937265396118164, | |
| "learning_rate": 7.818822994471504e-06, | |
| "loss": 0.8221435546875, | |
| "step": 51, | |
| "token_acc": 0.7600091509973993 | |
| }, | |
| { | |
| "epoch": 1.061381074168798, | |
| "grad_norm": 0.6445659399032593, | |
| "learning_rate": 7.72477457493619e-06, | |
| "loss": 0.798095703125, | |
| "step": 52, | |
| "token_acc": 0.7666009535619558 | |
| }, | |
| { | |
| "epoch": 1.081841432225064, | |
| "grad_norm": 0.6034528613090515, | |
| "learning_rate": 7.629334338083774e-06, | |
| "loss": 0.8121337890625, | |
| "step": 53, | |
| "token_acc": 0.761754260042804 | |
| }, | |
| { | |
| "epoch": 1.10230179028133, | |
| "grad_norm": 0.7658072710037231, | |
| "learning_rate": 7.532551034873558e-06, | |
| "loss": 0.8314208984375, | |
| "step": 54, | |
| "token_acc": 0.7583218819656938 | |
| }, | |
| { | |
| "epoch": 1.1227621483375958, | |
| "grad_norm": 0.5790229439735413, | |
| "learning_rate": 7.43447410230435e-06, | |
| "loss": 0.81494140625, | |
| "step": 55, | |
| "token_acc": 0.7614948252002275 | |
| }, | |
| { | |
| "epoch": 1.143222506393862, | |
| "grad_norm": 0.6969874501228333, | |
| "learning_rate": 7.335153638162005e-06, | |
| "loss": 0.80810546875, | |
| "step": 56, | |
| "token_acc": 0.7627752172619252 | |
| }, | |
| { | |
| "epoch": 1.1636828644501278, | |
| "grad_norm": 0.6890274286270142, | |
| "learning_rate": 7.234640375429427e-06, | |
| "loss": 0.7890625, | |
| "step": 57, | |
| "token_acc": 0.7680120601871605 | |
| }, | |
| { | |
| "epoch": 1.184143222506394, | |
| "grad_norm": 0.8471683859825134, | |
| "learning_rate": 7.132985656372126e-06, | |
| "loss": 0.7908935546875, | |
| "step": 58, | |
| "token_acc": 0.7681585540637447 | |
| }, | |
| { | |
| "epoch": 1.2046035805626598, | |
| "grad_norm": 0.5586804747581482, | |
| "learning_rate": 7.030241406312528e-06, | |
| "loss": 0.7999267578125, | |
| "step": 59, | |
| "token_acc": 0.7652726739906083 | |
| }, | |
| { | |
| "epoch": 1.2250639386189257, | |
| "grad_norm": 0.6501573324203491, | |
| "learning_rate": 6.926460107106483e-06, | |
| "loss": 0.8023681640625, | |
| "step": 60, | |
| "token_acc": 0.7641969833563484 | |
| }, | |
| { | |
| "epoch": 1.2250639386189257, | |
| "eval_loss": 260.03515625, | |
| "eval_runtime": 174.7319, | |
| "eval_samples_per_second": 0.086, | |
| "eval_steps_per_second": 0.006, | |
| "eval_token_acc": 0.7517990566431523, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.2455242966751918, | |
| "grad_norm": 0.67079758644104, | |
| "learning_rate": 6.8216947703354815e-06, | |
| "loss": 0.80908203125, | |
| "step": 61, | |
| "token_acc": 0.762754462206252 | |
| }, | |
| { | |
| "epoch": 1.265984654731458, | |
| "grad_norm": 0.8003351092338562, | |
| "learning_rate": 6.715998910228296e-06, | |
| "loss": 0.822021484375, | |
| "step": 62, | |
| "token_acc": 0.7596542081982427 | |
| }, | |
| { | |
| "epoch": 1.2864450127877238, | |
| "grad_norm": 0.5755249857902527, | |
| "learning_rate": 6.609426516325859e-06, | |
| "loss": 0.7877197265625, | |
| "step": 63, | |
| "token_acc": 0.7665187875244992 | |
| }, | |
| { | |
| "epoch": 1.3069053708439897, | |
| "grad_norm": 0.5514203310012817, | |
| "learning_rate": 6.502032025903356e-06, | |
| "loss": 0.79248046875, | |
| "step": 64, | |
| "token_acc": 0.7668143054654905 | |
| }, | |
| { | |
| "epoch": 1.3273657289002558, | |
| "grad_norm": 0.5459880828857422, | |
| "learning_rate": 6.393870296163616e-06, | |
| "loss": 0.794677734375, | |
| "step": 65, | |
| "token_acc": 0.7654924274646578 | |
| }, | |
| { | |
| "epoch": 1.3478260869565217, | |
| "grad_norm": 0.7557441592216492, | |
| "learning_rate": 6.284996576216014e-06, | |
| "loss": 0.8095703125, | |
| "step": 66, | |
| "token_acc": 0.7632387915441781 | |
| }, | |
| { | |
| "epoch": 1.3682864450127878, | |
| "grad_norm": 0.5115758776664734, | |
| "learning_rate": 6.175466478855161e-06, | |
| "loss": 0.787109375, | |
| "step": 67, | |
| "token_acc": 0.7676507146997723 | |
| }, | |
| { | |
| "epoch": 1.3887468030690537, | |
| "grad_norm": 0.6818933486938477, | |
| "learning_rate": 6.065335952153846e-06, | |
| "loss": 0.7919921875, | |
| "step": 68, | |
| "token_acc": 0.7656047815638671 | |
| }, | |
| { | |
| "epoch": 1.4092071611253196, | |
| "grad_norm": 0.5157542824745178, | |
| "learning_rate": 5.954661250884704e-06, | |
| "loss": 0.7918701171875, | |
| "step": 69, | |
| "token_acc": 0.7667713340544047 | |
| }, | |
| { | |
| "epoch": 1.4296675191815857, | |
| "grad_norm": 0.5024055242538452, | |
| "learning_rate": 5.843498907785236e-06, | |
| "loss": 0.7869873046875, | |
| "step": 70, | |
| "token_acc": 0.767566587893184 | |
| }, | |
| { | |
| "epoch": 1.4296675191815857, | |
| "eval_loss": 258.23828125, | |
| "eval_runtime": 190.6896, | |
| "eval_samples_per_second": 0.079, | |
| "eval_steps_per_second": 0.005, | |
| "eval_token_acc": 0.7528071127436657, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.4501278772378516, | |
| "grad_norm": 0.541907548904419, | |
| "learning_rate": 5.731905704680834e-06, | |
| "loss": 0.799072265625, | |
| "step": 71, | |
| "token_acc": 0.7636709487617457 | |
| }, | |
| { | |
| "epoch": 1.4705882352941178, | |
| "grad_norm": 0.5258617997169495, | |
| "learning_rate": 5.6199386434805615e-06, | |
| "loss": 0.7833251953125, | |
| "step": 72, | |
| "token_acc": 0.7686619982839542 | |
| }, | |
| { | |
| "epoch": 1.4910485933503836, | |
| "grad_norm": 0.7432481646537781, | |
| "learning_rate": 5.507654917060541e-06, | |
| "loss": 0.8057861328125, | |
| "step": 73, | |
| "token_acc": 0.7638059078138052 | |
| }, | |
| { | |
| "epoch": 1.5115089514066495, | |
| "grad_norm": 0.5650312900543213, | |
| "learning_rate": 5.395111880049775e-06, | |
| "loss": 0.7869873046875, | |
| "step": 74, | |
| "token_acc": 0.7681778820079445 | |
| }, | |
| { | |
| "epoch": 1.5319693094629157, | |
| "grad_norm": 1.214871883392334, | |
| "learning_rate": 5.28236701953335e-06, | |
| "loss": 0.8092041015625, | |
| "step": 75, | |
| "token_acc": 0.7620805749476309 | |
| }, | |
| { | |
| "epoch": 1.5524296675191815, | |
| "grad_norm": 0.5642526745796204, | |
| "learning_rate": 5.169477925687981e-06, | |
| "loss": 0.776611328125, | |
| "step": 76, | |
| "token_acc": 0.7701183258952242 | |
| }, | |
| { | |
| "epoch": 1.5728900255754477, | |
| "grad_norm": 0.4626403748989105, | |
| "learning_rate": 5.0565022623649e-06, | |
| "loss": 0.8040771484375, | |
| "step": 77, | |
| "token_acc": 0.7638591574550989 | |
| }, | |
| { | |
| "epoch": 1.5933503836317136, | |
| "grad_norm": 0.5212917923927307, | |
| "learning_rate": 4.943497737635103e-06, | |
| "loss": 0.807861328125, | |
| "step": 78, | |
| "token_acc": 0.7629239401114802 | |
| }, | |
| { | |
| "epoch": 1.6138107416879794, | |
| "grad_norm": 0.5400001406669617, | |
| "learning_rate": 4.830522074312019e-06, | |
| "loss": 0.797119140625, | |
| "step": 79, | |
| "token_acc": 0.7648081896745939 | |
| }, | |
| { | |
| "epoch": 1.6342710997442456, | |
| "grad_norm": 0.5016899108886719, | |
| "learning_rate": 4.717632980466652e-06, | |
| "loss": 0.7860107421875, | |
| "step": 80, | |
| "token_acc": 0.7684027888411594 | |
| }, | |
| { | |
| "epoch": 1.6342710997442456, | |
| "eval_loss": 256.85546875, | |
| "eval_runtime": 186.06, | |
| "eval_samples_per_second": 0.081, | |
| "eval_steps_per_second": 0.005, | |
| "eval_token_acc": 0.754155361689694, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.6547314578005117, | |
| "grad_norm": 0.5642063617706299, | |
| "learning_rate": 4.6048881199502265e-06, | |
| "loss": 0.7830810546875, | |
| "step": 81, | |
| "token_acc": 0.7678564181548047 | |
| }, | |
| { | |
| "epoch": 1.6751918158567776, | |
| "grad_norm": 0.4916647970676422, | |
| "learning_rate": 4.49234508293946e-06, | |
| "loss": 0.779052734375, | |
| "step": 82, | |
| "token_acc": 0.77010977208347 | |
| }, | |
| { | |
| "epoch": 1.6956521739130435, | |
| "grad_norm": 0.6295871138572693, | |
| "learning_rate": 4.38006135651944e-06, | |
| "loss": 0.8001708984375, | |
| "step": 83, | |
| "token_acc": 0.7653163568544067 | |
| }, | |
| { | |
| "epoch": 1.7161125319693094, | |
| "grad_norm": 0.4934154450893402, | |
| "learning_rate": 4.268094295319167e-06, | |
| "loss": 0.794677734375, | |
| "step": 84, | |
| "token_acc": 0.7658125406456339 | |
| }, | |
| { | |
| "epoch": 1.7365728900255755, | |
| "grad_norm": 0.4807905852794647, | |
| "learning_rate": 4.1565010922147644e-06, | |
| "loss": 0.8067626953125, | |
| "step": 85, | |
| "token_acc": 0.7628501378393466 | |
| }, | |
| { | |
| "epoch": 1.7570332480818416, | |
| "grad_norm": 0.4914467930793762, | |
| "learning_rate": 4.045338749115299e-06, | |
| "loss": 0.7962646484375, | |
| "step": 86, | |
| "token_acc": 0.7652487208210966 | |
| }, | |
| { | |
| "epoch": 1.7774936061381075, | |
| "grad_norm": 0.46617603302001953, | |
| "learning_rate": 3.934664047846157e-06, | |
| "loss": 0.78271484375, | |
| "step": 87, | |
| "token_acc": 0.7677687034999344 | |
| }, | |
| { | |
| "epoch": 1.7979539641943734, | |
| "grad_norm": 0.535650908946991, | |
| "learning_rate": 3.8245335211448404e-06, | |
| "loss": 0.7938232421875, | |
| "step": 88, | |
| "token_acc": 0.766091786543743 | |
| }, | |
| { | |
| "epoch": 1.8184143222506393, | |
| "grad_norm": 0.47340837121009827, | |
| "learning_rate": 3.715003423783986e-06, | |
| "loss": 0.8013916015625, | |
| "step": 89, | |
| "token_acc": 0.7640529262026424 | |
| }, | |
| { | |
| "epoch": 1.8388746803069054, | |
| "grad_norm": 0.6464864611625671, | |
| "learning_rate": 3.6061297038363853e-06, | |
| "loss": 0.80810546875, | |
| "step": 90, | |
| "token_acc": 0.7632898394951272 | |
| }, | |
| { | |
| "epoch": 1.8388746803069054, | |
| "eval_loss": 255.55859375, | |
| "eval_runtime": 180.9095, | |
| "eval_samples_per_second": 0.083, | |
| "eval_steps_per_second": 0.006, | |
| "eval_token_acc": 0.7548754017614894, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.8593350383631715, | |
| "grad_norm": 0.6239431500434875, | |
| "learning_rate": 3.497967974096647e-06, | |
| "loss": 0.800537109375, | |
| "step": 91, | |
| "token_acc": 0.763830056360141 | |
| }, | |
| { | |
| "epoch": 1.8797953964194374, | |
| "grad_norm": 0.45245441794395447, | |
| "learning_rate": 3.3905734836741415e-06, | |
| "loss": 0.8072509765625, | |
| "step": 92, | |
| "token_acc": 0.7629841640196129 | |
| }, | |
| { | |
| "epoch": 1.9002557544757033, | |
| "grad_norm": 0.5149667263031006, | |
| "learning_rate": 3.2840010897717045e-06, | |
| "loss": 0.7896728515625, | |
| "step": 93, | |
| "token_acc": 0.7672215188664161 | |
| }, | |
| { | |
| "epoch": 1.9207161125319692, | |
| "grad_norm": 0.4640462398529053, | |
| "learning_rate": 3.178305229664519e-06, | |
| "loss": 0.802978515625, | |
| "step": 94, | |
| "token_acc": 0.763835117063376 | |
| }, | |
| { | |
| "epoch": 1.9411764705882353, | |
| "grad_norm": 0.45892465114593506, | |
| "learning_rate": 3.073539892893519e-06, | |
| "loss": 0.7943115234375, | |
| "step": 95, | |
| "token_acc": 0.7657574351900455 | |
| }, | |
| { | |
| "epoch": 1.9616368286445014, | |
| "grad_norm": 0.47301074862480164, | |
| "learning_rate": 2.969758593687475e-06, | |
| "loss": 0.7738037109375, | |
| "step": 96, | |
| "token_acc": 0.7718963916631811 | |
| }, | |
| { | |
| "epoch": 1.9820971867007673, | |
| "grad_norm": 0.551275372505188, | |
| "learning_rate": 2.8670143436278757e-06, | |
| "loss": 0.7822265625, | |
| "step": 97, | |
| "token_acc": 0.7684924397079685 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.4965466260910034, | |
| "learning_rate": 2.765359624570574e-06, | |
| "loss": 0.76708984375, | |
| "step": 98, | |
| "token_acc": 0.7723498958825589 | |
| }, | |
| { | |
| "epoch": 2.020460358056266, | |
| "grad_norm": 0.6020316481590271, | |
| "learning_rate": 2.664846361837997e-06, | |
| "loss": 0.7359619140625, | |
| "step": 99, | |
| "token_acc": 0.779019540556512 | |
| }, | |
| { | |
| "epoch": 2.040920716112532, | |
| "grad_norm": 0.5324369668960571, | |
| "learning_rate": 2.565525897695651e-06, | |
| "loss": 0.7454833984375, | |
| "step": 100, | |
| "token_acc": 0.7767212030542502 | |
| }, | |
| { | |
| "epoch": 2.040920716112532, | |
| "eval_loss": 256.80859375, | |
| "eval_runtime": 176.504, | |
| "eval_samples_per_second": 0.085, | |
| "eval_steps_per_second": 0.006, | |
| "eval_token_acc": 0.754800267145302, | |
| "step": 100 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 147, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.812412281074483e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |