| { |
| "best_metric": 0.1309033051276488, |
| "best_model_checkpoint": "/data/schen/xlsr_teochew_model_no_punctuation_pinyin_ES/checkpoint-3500", |
| "epoch": 11.2, |
| "eval_steps": 500, |
| "global_step": 3500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.2721055746078491, |
| "learning_rate": 2.9999999999999997e-05, |
| "loss": 0.1692, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.4928969442844391, |
| "learning_rate": 5.9999999999999995e-05, |
| "loss": 0.151, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.5016694068908691, |
| "learning_rate": 8.999999999999999e-05, |
| "loss": 0.1446, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.5080090165138245, |
| "learning_rate": 0.00011999999999999999, |
| "loss": 0.1408, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.2801826000213623, |
| "learning_rate": 0.0001494, |
| "loss": 0.1475, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.2742992639541626, |
| "learning_rate": 0.00017939999999999997, |
| "loss": 0.1384, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.6099441051483154, |
| "learning_rate": 0.00020939999999999997, |
| "loss": 0.1396, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.5118276476860046, |
| "learning_rate": 0.0002394, |
| "loss": 0.1394, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.40769001841545105, |
| "learning_rate": 0.0002694, |
| "loss": 0.1384, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.40534549951553345, |
| "learning_rate": 0.00029939999999999996, |
| "loss": 0.1496, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_loss": 0.1971094310283661, |
| "eval_runtime": 40.7703, |
| "eval_samples_per_second": 30.66, |
| "eval_steps_per_second": 3.851, |
| "eval_wer": 0.15227453532942792, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.2814898192882538, |
| "learning_rate": 0.000299046357615894, |
| "loss": 0.1536, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 1.256887674331665, |
| "learning_rate": 0.0002980529801324503, |
| "loss": 0.1602, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 0.2832282483577728, |
| "learning_rate": 0.0002970596026490066, |
| "loss": 0.1592, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 0.3951769769191742, |
| "learning_rate": 0.0002960662251655629, |
| "loss": 0.1563, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.4493905305862427, |
| "learning_rate": 0.00029507284768211917, |
| "loss": 0.1484, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 0.2806813716888428, |
| "learning_rate": 0.0002940794701986755, |
| "loss": 0.1627, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.7199999999999998, |
| "grad_norm": 0.926182210445404, |
| "learning_rate": 0.0002930860927152318, |
| "loss": 0.1675, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 0.36387988924980164, |
| "learning_rate": 0.0002921125827814569, |
| "loss": 0.153, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.04, |
| "grad_norm": 0.2582753002643585, |
| "learning_rate": 0.00029111920529801324, |
| "loss": 0.1675, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 0.30846667289733887, |
| "learning_rate": 0.0002901258278145695, |
| "loss": 0.1606, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.2, |
| "eval_loss": 0.19323976337909698, |
| "eval_runtime": 40.6846, |
| "eval_samples_per_second": 30.724, |
| "eval_steps_per_second": 3.859, |
| "eval_wer": 0.15527590945252043, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.36, |
| "grad_norm": 0.35464128851890564, |
| "learning_rate": 0.0002891324503311258, |
| "loss": 0.1546, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.52, |
| "grad_norm": 0.3184821307659149, |
| "learning_rate": 0.00028813907284768207, |
| "loss": 0.1532, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.68, |
| "grad_norm": 0.3197060227394104, |
| "learning_rate": 0.0002871456953642384, |
| "loss": 0.1505, |
| "step": 1150 |
| }, |
| { |
| "epoch": 3.84, |
| "grad_norm": 0.2760404050350189, |
| "learning_rate": 0.0002861721854304636, |
| "loss": 0.1623, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.49457865953445435, |
| "learning_rate": 0.00028517880794701986, |
| "loss": 0.1666, |
| "step": 1250 |
| }, |
| { |
| "epoch": 4.16, |
| "grad_norm": 1.0381335020065308, |
| "learning_rate": 0.00028418543046357614, |
| "loss": 0.1409, |
| "step": 1300 |
| }, |
| { |
| "epoch": 4.32, |
| "grad_norm": 0.5828660130500793, |
| "learning_rate": 0.0002831920529801324, |
| "loss": 0.1432, |
| "step": 1350 |
| }, |
| { |
| "epoch": 4.48, |
| "grad_norm": 0.9824939966201782, |
| "learning_rate": 0.0002821986754966887, |
| "loss": 0.1447, |
| "step": 1400 |
| }, |
| { |
| "epoch": 4.64, |
| "grad_norm": 0.5372733473777771, |
| "learning_rate": 0.00028120529801324497, |
| "loss": 0.15, |
| "step": 1450 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 0.5010742545127869, |
| "learning_rate": 0.0002802119205298013, |
| "loss": 0.1512, |
| "step": 1500 |
| }, |
| { |
| "epoch": 4.8, |
| "eval_loss": 0.19122301042079926, |
| "eval_runtime": 40.5484, |
| "eval_samples_per_second": 30.827, |
| "eval_steps_per_second": 3.872, |
| "eval_wer": 0.15097273450495408, |
| "step": 1500 |
| }, |
| { |
| "epoch": 4.96, |
| "grad_norm": 0.6968886852264404, |
| "learning_rate": 0.0002792185430463576, |
| "loss": 0.1567, |
| "step": 1550 |
| }, |
| { |
| "epoch": 5.12, |
| "grad_norm": 0.44052571058273315, |
| "learning_rate": 0.00027824503311258276, |
| "loss": 0.1469, |
| "step": 1600 |
| }, |
| { |
| "epoch": 5.28, |
| "grad_norm": 0.278921514749527, |
| "learning_rate": 0.00027727152317880794, |
| "loss": 0.1497, |
| "step": 1650 |
| }, |
| { |
| "epoch": 5.44, |
| "grad_norm": 0.380680650472641, |
| "learning_rate": 0.0002762781456953642, |
| "loss": 0.1449, |
| "step": 1700 |
| }, |
| { |
| "epoch": 5.6, |
| "grad_norm": 0.42826640605926514, |
| "learning_rate": 0.0002752847682119205, |
| "loss": 0.1447, |
| "step": 1750 |
| }, |
| { |
| "epoch": 5.76, |
| "grad_norm": 0.4069671928882599, |
| "learning_rate": 0.0002742913907284768, |
| "loss": 0.1491, |
| "step": 1800 |
| }, |
| { |
| "epoch": 5.92, |
| "grad_norm": 0.3466169238090515, |
| "learning_rate": 0.00027329801324503305, |
| "loss": 0.1573, |
| "step": 1850 |
| }, |
| { |
| "epoch": 6.08, |
| "grad_norm": 0.3684636652469635, |
| "learning_rate": 0.0002723046357615894, |
| "loss": 0.1392, |
| "step": 1900 |
| }, |
| { |
| "epoch": 6.24, |
| "grad_norm": 0.4097164273262024, |
| "learning_rate": 0.00027131125827814566, |
| "loss": 0.1338, |
| "step": 1950 |
| }, |
| { |
| "epoch": 6.4, |
| "grad_norm": 0.3014273941516876, |
| "learning_rate": 0.00027033774834437084, |
| "loss": 0.1333, |
| "step": 2000 |
| }, |
| { |
| "epoch": 6.4, |
| "eval_loss": 0.19138558208942413, |
| "eval_runtime": 41.0557, |
| "eval_samples_per_second": 30.446, |
| "eval_steps_per_second": 3.824, |
| "eval_wer": 0.14066681131120273, |
| "step": 2000 |
| }, |
| { |
| "epoch": 6.5600000000000005, |
| "grad_norm": 0.2529243528842926, |
| "learning_rate": 0.0002693443708609271, |
| "loss": 0.1367, |
| "step": 2050 |
| }, |
| { |
| "epoch": 6.72, |
| "grad_norm": 0.23533473908901215, |
| "learning_rate": 0.00026835099337748345, |
| "loss": 0.1439, |
| "step": 2100 |
| }, |
| { |
| "epoch": 6.88, |
| "grad_norm": 0.524277925491333, |
| "learning_rate": 0.00026735761589403973, |
| "loss": 0.1379, |
| "step": 2150 |
| }, |
| { |
| "epoch": 7.04, |
| "grad_norm": 0.5774759650230408, |
| "learning_rate": 0.000266364238410596, |
| "loss": 0.1354, |
| "step": 2200 |
| }, |
| { |
| "epoch": 7.2, |
| "grad_norm": 0.26473215222358704, |
| "learning_rate": 0.00026539072847682113, |
| "loss": 0.143, |
| "step": 2250 |
| }, |
| { |
| "epoch": 7.36, |
| "grad_norm": 0.27728524804115295, |
| "learning_rate": 0.00026439735099337747, |
| "loss": 0.1315, |
| "step": 2300 |
| }, |
| { |
| "epoch": 7.52, |
| "grad_norm": 0.7108523845672607, |
| "learning_rate": 0.00026340397350993374, |
| "loss": 0.1359, |
| "step": 2350 |
| }, |
| { |
| "epoch": 7.68, |
| "grad_norm": 0.33975887298583984, |
| "learning_rate": 0.0002624105960264901, |
| "loss": 0.1337, |
| "step": 2400 |
| }, |
| { |
| "epoch": 7.84, |
| "grad_norm": 0.3907933533191681, |
| "learning_rate": 0.00026141721854304635, |
| "loss": 0.126, |
| "step": 2450 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.48056796193122864, |
| "learning_rate": 0.00026042384105960263, |
| "loss": 0.1342, |
| "step": 2500 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 0.2016124725341797, |
| "eval_runtime": 41.1292, |
| "eval_samples_per_second": 30.392, |
| "eval_steps_per_second": 3.817, |
| "eval_wer": 0.14721197656758517, |
| "step": 2500 |
| }, |
| { |
| "epoch": 8.16, |
| "grad_norm": 0.7174838185310364, |
| "learning_rate": 0.0002594304635761589, |
| "loss": 0.1325, |
| "step": 2550 |
| }, |
| { |
| "epoch": 8.32, |
| "grad_norm": 0.7047253251075745, |
| "learning_rate": 0.0002584370860927152, |
| "loss": 0.1279, |
| "step": 2600 |
| }, |
| { |
| "epoch": 8.48, |
| "grad_norm": 0.7922475934028625, |
| "learning_rate": 0.00025746357615894037, |
| "loss": 0.1304, |
| "step": 2650 |
| }, |
| { |
| "epoch": 8.64, |
| "grad_norm": 0.5174902081489563, |
| "learning_rate": 0.00025647019867549664, |
| "loss": 0.1272, |
| "step": 2700 |
| }, |
| { |
| "epoch": 8.8, |
| "grad_norm": 1.5003706216812134, |
| "learning_rate": 0.000255476821192053, |
| "loss": 0.1327, |
| "step": 2750 |
| }, |
| { |
| "epoch": 8.96, |
| "grad_norm": 0.7675436735153198, |
| "learning_rate": 0.00025448344370860925, |
| "loss": 0.1305, |
| "step": 2800 |
| }, |
| { |
| "epoch": 9.12, |
| "grad_norm": 0.3741365373134613, |
| "learning_rate": 0.00025349006622516553, |
| "loss": 0.1118, |
| "step": 2850 |
| }, |
| { |
| "epoch": 9.28, |
| "grad_norm": 0.39765921235084534, |
| "learning_rate": 0.00025249668874172186, |
| "loss": 0.1259, |
| "step": 2900 |
| }, |
| { |
| "epoch": 9.44, |
| "grad_norm": 0.32159069180488586, |
| "learning_rate": 0.00025150331125827814, |
| "loss": 0.1302, |
| "step": 2950 |
| }, |
| { |
| "epoch": 9.6, |
| "grad_norm": 0.3576621115207672, |
| "learning_rate": 0.00025052980132450327, |
| "loss": 0.1252, |
| "step": 3000 |
| }, |
| { |
| "epoch": 9.6, |
| "eval_loss": 0.1925106644630432, |
| "eval_runtime": 41.148, |
| "eval_samples_per_second": 30.378, |
| "eval_steps_per_second": 3.815, |
| "eval_wer": 0.13513415780718882, |
| "step": 3000 |
| }, |
| { |
| "epoch": 9.76, |
| "grad_norm": 0.43902838230133057, |
| "learning_rate": 0.0002495364238410596, |
| "loss": 0.126, |
| "step": 3050 |
| }, |
| { |
| "epoch": 9.92, |
| "grad_norm": 0.49293768405914307, |
| "learning_rate": 0.0002485430463576159, |
| "loss": 0.1406, |
| "step": 3100 |
| }, |
| { |
| "epoch": 10.08, |
| "grad_norm": 0.2280844897031784, |
| "learning_rate": 0.00024754966887417215, |
| "loss": 0.1191, |
| "step": 3150 |
| }, |
| { |
| "epoch": 10.24, |
| "grad_norm": 0.2204989492893219, |
| "learning_rate": 0.00024655629139072843, |
| "loss": 0.1162, |
| "step": 3200 |
| }, |
| { |
| "epoch": 10.4, |
| "grad_norm": 0.8076439499855042, |
| "learning_rate": 0.00024556291390728476, |
| "loss": 0.1197, |
| "step": 3250 |
| }, |
| { |
| "epoch": 10.56, |
| "grad_norm": 0.22111420333385468, |
| "learning_rate": 0.00024456953642384104, |
| "loss": 0.1156, |
| "step": 3300 |
| }, |
| { |
| "epoch": 10.72, |
| "grad_norm": 0.4509029686450958, |
| "learning_rate": 0.00024357615894039732, |
| "loss": 0.1181, |
| "step": 3350 |
| }, |
| { |
| "epoch": 10.88, |
| "grad_norm": 0.5422528386116028, |
| "learning_rate": 0.00024258278145695365, |
| "loss": 0.1263, |
| "step": 3400 |
| }, |
| { |
| "epoch": 11.04, |
| "grad_norm": 0.21502262353897095, |
| "learning_rate": 0.00024160927152317878, |
| "loss": 0.1157, |
| "step": 3450 |
| }, |
| { |
| "epoch": 11.2, |
| "grad_norm": 0.22735533118247986, |
| "learning_rate": 0.00024061589403973508, |
| "loss": 0.111, |
| "step": 3500 |
| }, |
| { |
| "epoch": 11.2, |
| "eval_loss": 0.18385492265224457, |
| "eval_runtime": 41.1887, |
| "eval_samples_per_second": 30.348, |
| "eval_steps_per_second": 3.812, |
| "eval_wer": 0.1309033051276488, |
| "step": 3500 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 15600, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 50, |
| "save_steps": 500, |
| "total_flos": 1.880734589144977e+19, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|