| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9907100199071002, | |
| "eval_steps": 500, | |
| "global_step": 75000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0004966821499668214, | |
| "loss": 1.4142, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.000493364299933643, | |
| "loss": 1.4047, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0004900464499004645, | |
| "loss": 1.4055, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.000486728599867286, | |
| "loss": 1.405, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0004834107498341075, | |
| "loss": 1.4066, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000480092899800929, | |
| "loss": 1.392, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0004767750497677505, | |
| "loss": 1.3947, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.000473457199734572, | |
| "loss": 1.3762, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0004701393497013935, | |
| "loss": 1.3958, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00046682149966821503, | |
| "loss": 1.3809, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0004635036496350365, | |
| "loss": 1.3802, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00046018579960185803, | |
| "loss": 1.3939, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0004568679495686795, | |
| "loss": 1.3897, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00045355009953550104, | |
| "loss": 1.3919, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0004502322495023225, | |
| "loss": 1.3809, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00044691439946914404, | |
| "loss": 1.3892, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0004435965494359655, | |
| "loss": 1.3906, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00044027869940278704, | |
| "loss": 1.3814, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0004369608493696085, | |
| "loss": 1.3844, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00043364299933643, | |
| "loss": 1.3937, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0004303251493032515, | |
| "loss": 1.3816, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.000427007299270073, | |
| "loss": 1.3878, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0004236894492368945, | |
| "loss": 1.3933, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.000420371599203716, | |
| "loss": 1.3822, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00041705374917053753, | |
| "loss": 1.3863, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.000413735899137359, | |
| "loss": 1.4047, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00041041804910418053, | |
| "loss": 1.3921, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.000407100199071002, | |
| "loss": 1.3822, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00040378234903782354, | |
| "loss": 1.3941, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.000400464499004645, | |
| "loss": 1.3849, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00039714664897146654, | |
| "loss": 1.3709, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.000393828798938288, | |
| "loss": 1.3679, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00039051094890510954, | |
| "loss": 1.3756, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.000387193098871931, | |
| "loss": 1.397, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00038387524883875255, | |
| "loss": 1.4014, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00038055739880557397, | |
| "loss": 1.3892, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00037723954877239544, | |
| "loss": 1.4107, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00037392169873921697, | |
| "loss": 1.4083, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00037060384870603845, | |
| "loss": 1.3963, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00036728599867286, | |
| "loss": 1.3899, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00036396814863968145, | |
| "loss": 1.3972, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.000360650298606503, | |
| "loss": 1.3851, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00035733244857332445, | |
| "loss": 1.3899, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.000354014598540146, | |
| "loss": 1.3788, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00035069674850696746, | |
| "loss": 1.3806, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.000347378898473789, | |
| "loss": 1.3829, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00034406104844061046, | |
| "loss": 1.3742, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.000340743198407432, | |
| "loss": 1.381, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00033742534837425347, | |
| "loss": 1.3711, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.000334107498341075, | |
| "loss": 1.3752, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00033078964830789647, | |
| "loss": 1.3776, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.000327471798274718, | |
| "loss": 1.3775, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00032415394824153947, | |
| "loss": 1.3608, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.000320836098208361, | |
| "loss": 1.3636, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0003175182481751825, | |
| "loss": 1.3684, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00031420039814200395, | |
| "loss": 1.3777, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0003108825481088255, | |
| "loss": 1.3497, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00030756469807564695, | |
| "loss": 1.3621, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0003042468480424685, | |
| "loss": 1.3717, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00030092899800928996, | |
| "loss": 1.3637, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0002976111479761115, | |
| "loss": 1.3502, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00029429329794293296, | |
| "loss": 1.3559, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0002909754479097545, | |
| "loss": 1.3595, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00028765759787657597, | |
| "loss": 1.3525, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0002843397478433975, | |
| "loss": 1.3633, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00028102189781021897, | |
| "loss": 1.3645, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0002777040477770405, | |
| "loss": 1.3665, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.000274386197743862, | |
| "loss": 1.367, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.0002710683477106835, | |
| "loss": 1.3482, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.000267750497677505, | |
| "loss": 1.3663, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0002644326476443265, | |
| "loss": 1.3555, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.000261114797611148, | |
| "loss": 1.3623, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00025779694757796946, | |
| "loss": 1.3656, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.000254479097544791, | |
| "loss": 1.3651, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00025116124751161246, | |
| "loss": 1.3712, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.000247843397478434, | |
| "loss": 1.3459, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00024452554744525546, | |
| "loss": 1.3334, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.000241207697412077, | |
| "loss": 1.3231, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0002378898473788985, | |
| "loss": 1.3191, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00023457199734572, | |
| "loss": 1.3232, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0002312541473125415, | |
| "loss": 1.3239, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00022793629727936297, | |
| "loss": 1.3194, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00022461844724618447, | |
| "loss": 1.3074, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00022130059721300598, | |
| "loss": 1.3191, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00021798274717982748, | |
| "loss": 1.3092, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00021466489714664898, | |
| "loss": 1.3141, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.00021134704711347048, | |
| "loss": 1.3247, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.00020802919708029198, | |
| "loss": 1.3163, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00020471134704711349, | |
| "loss": 1.3124, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00020139349701393496, | |
| "loss": 1.3151, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00019807564698075646, | |
| "loss": 1.3062, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00019475779694757796, | |
| "loss": 1.3237, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.00019143994691439947, | |
| "loss": 1.3143, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.00018812209688122097, | |
| "loss": 1.3007, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00018480424684804247, | |
| "loss": 1.3142, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.00018148639681486397, | |
| "loss": 1.3144, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.00017816854678168547, | |
| "loss": 1.3175, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.00017485069674850697, | |
| "loss": 1.3011, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00017153284671532848, | |
| "loss": 1.3161, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.00016821499668214995, | |
| "loss": 1.295, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.00016489714664897145, | |
| "loss": 1.3161, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00016157929661579295, | |
| "loss": 1.3122, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.00015826144658261446, | |
| "loss": 1.304, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.00015494359654943596, | |
| "loss": 1.3086, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00015162574651625746, | |
| "loss": 1.2964, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.00014830789648307896, | |
| "loss": 1.3073, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00014499004644990046, | |
| "loss": 1.2976, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00014167219641672197, | |
| "loss": 1.2988, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.00013835434638354347, | |
| "loss": 1.3038, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00013503649635036497, | |
| "loss": 1.3085, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.00013171864631718647, | |
| "loss": 1.2942, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00012840079628400797, | |
| "loss": 1.298, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.00012508294625082948, | |
| "loss": 1.3083, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00012176509621765096, | |
| "loss": 1.3074, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.00011844724618447247, | |
| "loss": 1.2952, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.00011512939615129397, | |
| "loss": 1.2934, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00011181154611811546, | |
| "loss": 1.2949, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.00010849369608493696, | |
| "loss": 1.2845, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00010517584605175846, | |
| "loss": 1.2978, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00010185799601857996, | |
| "loss": 1.279, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 9.854014598540146e-05, | |
| "loss": 1.2962, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 9.522229595222296e-05, | |
| "loss": 1.3042, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 9.190444591904445e-05, | |
| "loss": 1.2836, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 8.858659588586595e-05, | |
| "loss": 1.3003, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 8.526874585268746e-05, | |
| "loss": 1.2998, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 8.195089581950896e-05, | |
| "loss": 1.2924, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 7.863304578633046e-05, | |
| "loss": 1.2915, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 7.531519575315196e-05, | |
| "loss": 1.2986, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 7.199734571997346e-05, | |
| "loss": 1.3072, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 6.867949568679497e-05, | |
| "loss": 1.2975, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 6.536164565361647e-05, | |
| "loss": 1.2987, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 6.204379562043796e-05, | |
| "loss": 1.2907, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 5.872594558725946e-05, | |
| "loss": 1.2932, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 5.540809555408095e-05, | |
| "loss": 1.2993, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 5.2090245520902455e-05, | |
| "loss": 1.2919, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 4.877239548772396e-05, | |
| "loss": 1.2915, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 1.2974, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 4.213669542136696e-05, | |
| "loss": 1.2813, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 3.881884538818845e-05, | |
| "loss": 1.2866, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.550099535500995e-05, | |
| "loss": 1.2861, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.218314532183145e-05, | |
| "loss": 1.2921, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 2.886529528865295e-05, | |
| "loss": 1.2973, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 2.5547445255474453e-05, | |
| "loss": 1.2901, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 2.2229595222295955e-05, | |
| "loss": 1.291, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8911745189117453e-05, | |
| "loss": 1.2768, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.559389515593895e-05, | |
| "loss": 1.2867, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.227604512276045e-05, | |
| "loss": 1.292, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 8.958195089581952e-06, | |
| "loss": 1.2956, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 5.64034505640345e-06, | |
| "loss": 1.294, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 2.32249502322495e-06, | |
| "loss": 1.2918, | |
| "step": 75000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 75350, | |
| "num_train_epochs": 2, | |
| "save_steps": 5000, | |
| "total_flos": 0.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |