| { |
| "best_global_step": 750, |
| "best_metric": 23.863094670607303, |
| "best_model_checkpoint": "./whisper-small-dv/checkpoint-500", |
| "epoch": 7.853403141361256, |
| "eval_steps": 250, |
| "global_step": 1500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.13089005235602094, |
| "grad_norm": 10.698588371276855, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 3.1116, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.2617801047120419, |
| "grad_norm": 9.169360160827637, |
| "learning_rate": 9.800000000000001e-06, |
| "loss": 1.6439, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.39267015706806285, |
| "grad_norm": 9.506369590759277, |
| "learning_rate": 1e-05, |
| "loss": 1.1823, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.5235602094240838, |
| "grad_norm": 8.656845092773438, |
| "learning_rate": 1e-05, |
| "loss": 0.9809, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6544502617801047, |
| "grad_norm": 7.963810443878174, |
| "learning_rate": 1e-05, |
| "loss": 0.8699, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.7853403141361257, |
| "grad_norm": 8.005380630493164, |
| "learning_rate": 1e-05, |
| "loss": 0.7183, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.9162303664921466, |
| "grad_norm": 6.437658309936523, |
| "learning_rate": 1e-05, |
| "loss": 0.5152, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.0471204188481675, |
| "grad_norm": 4.852717399597168, |
| "learning_rate": 1e-05, |
| "loss": 0.4517, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.1780104712041886, |
| "grad_norm": 4.497104167938232, |
| "learning_rate": 1e-05, |
| "loss": 0.3607, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.3089005235602094, |
| "grad_norm": 5.882933139801025, |
| "learning_rate": 1e-05, |
| "loss": 0.3435, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.3089005235602094, |
| "eval_loss": 0.4582085609436035, |
| "eval_runtime": 111.6293, |
| "eval_samples_per_second": 7.597, |
| "eval_steps_per_second": 0.475, |
| "eval_wer": 29.89798836876728, |
| "eval_wer_ortho": 36.224586737602124, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.4397905759162304, |
| "grad_norm": 5.737706184387207, |
| "learning_rate": 1e-05, |
| "loss": 0.338, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.5706806282722514, |
| "grad_norm": 4.902078151702881, |
| "learning_rate": 1e-05, |
| "loss": 0.3241, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.7015706806282722, |
| "grad_norm": 6.897104263305664, |
| "learning_rate": 1e-05, |
| "loss": 0.3376, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.8324607329842932, |
| "grad_norm": 5.536213397979736, |
| "learning_rate": 1e-05, |
| "loss": 0.3219, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.9633507853403143, |
| "grad_norm": 4.42805290222168, |
| "learning_rate": 1e-05, |
| "loss": 0.304, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.094240837696335, |
| "grad_norm": 3.3683159351348877, |
| "learning_rate": 1e-05, |
| "loss": 0.2182, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.225130890052356, |
| "grad_norm": 3.863335609436035, |
| "learning_rate": 1e-05, |
| "loss": 0.1761, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.356020942408377, |
| "grad_norm": 3.0521528720855713, |
| "learning_rate": 1e-05, |
| "loss": 0.2013, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.486910994764398, |
| "grad_norm": 3.18312931060791, |
| "learning_rate": 1e-05, |
| "loss": 0.1766, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.6178010471204187, |
| "grad_norm": 4.015468597412109, |
| "learning_rate": 1e-05, |
| "loss": 0.1883, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.6178010471204187, |
| "eval_loss": 0.40125927329063416, |
| "eval_runtime": 113.2585, |
| "eval_samples_per_second": 7.487, |
| "eval_steps_per_second": 0.468, |
| "eval_wer": 25.49337401086853, |
| "eval_wer_ortho": 31.702451073532206, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.7486910994764395, |
| "grad_norm": 4.0088043212890625, |
| "learning_rate": 1e-05, |
| "loss": 0.1923, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.8795811518324608, |
| "grad_norm": 5.095918655395508, |
| "learning_rate": 1e-05, |
| "loss": 0.1753, |
| "step": 550 |
| }, |
| { |
| "epoch": 3.0104712041884816, |
| "grad_norm": 2.5282716751098633, |
| "learning_rate": 1e-05, |
| "loss": 0.1686, |
| "step": 575 |
| }, |
| { |
| "epoch": 3.141361256544503, |
| "grad_norm": 3.1651320457458496, |
| "learning_rate": 1e-05, |
| "loss": 0.1041, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.2722513089005236, |
| "grad_norm": 3.057821750640869, |
| "learning_rate": 1e-05, |
| "loss": 0.0916, |
| "step": 625 |
| }, |
| { |
| "epoch": 3.4031413612565444, |
| "grad_norm": 2.9415502548217773, |
| "learning_rate": 1e-05, |
| "loss": 0.1017, |
| "step": 650 |
| }, |
| { |
| "epoch": 3.5340314136125652, |
| "grad_norm": 3.2176668643951416, |
| "learning_rate": 1e-05, |
| "loss": 0.0951, |
| "step": 675 |
| }, |
| { |
| "epoch": 3.6649214659685865, |
| "grad_norm": 3.7452964782714844, |
| "learning_rate": 1e-05, |
| "loss": 0.1, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.7958115183246073, |
| "grad_norm": 3.433546543121338, |
| "learning_rate": 1e-05, |
| "loss": 0.0931, |
| "step": 725 |
| }, |
| { |
| "epoch": 3.9267015706806285, |
| "grad_norm": 3.0128612518310547, |
| "learning_rate": 1e-05, |
| "loss": 0.0977, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.9267015706806285, |
| "eval_loss": 0.40142181515693665, |
| "eval_runtime": 111.1949, |
| "eval_samples_per_second": 7.626, |
| "eval_steps_per_second": 0.477, |
| "eval_wer": 23.863094670607303, |
| "eval_wer_ortho": 30.543416302489074, |
| "step": 750 |
| }, |
| { |
| "epoch": 4.057591623036649, |
| "grad_norm": 1.552932620048523, |
| "learning_rate": 1e-05, |
| "loss": 0.0771, |
| "step": 775 |
| }, |
| { |
| "epoch": 4.18848167539267, |
| "grad_norm": 2.078547954559326, |
| "learning_rate": 1e-05, |
| "loss": 0.0504, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.319371727748691, |
| "grad_norm": 1.9236699342727661, |
| "learning_rate": 1e-05, |
| "loss": 0.06, |
| "step": 825 |
| }, |
| { |
| "epoch": 4.450261780104712, |
| "grad_norm": 2.1147005558013916, |
| "learning_rate": 1e-05, |
| "loss": 0.0487, |
| "step": 850 |
| }, |
| { |
| "epoch": 4.581151832460733, |
| "grad_norm": 2.60475492477417, |
| "learning_rate": 1e-05, |
| "loss": 0.0503, |
| "step": 875 |
| }, |
| { |
| "epoch": 4.712041884816754, |
| "grad_norm": 1.980432391166687, |
| "learning_rate": 1e-05, |
| "loss": 0.0477, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.842931937172775, |
| "grad_norm": 2.8968000411987305, |
| "learning_rate": 1e-05, |
| "loss": 0.0542, |
| "step": 925 |
| }, |
| { |
| "epoch": 4.973821989528796, |
| "grad_norm": 3.0612237453460693, |
| "learning_rate": 1e-05, |
| "loss": 0.0544, |
| "step": 950 |
| }, |
| { |
| "epoch": 5.104712041884817, |
| "grad_norm": 1.3939883708953857, |
| "learning_rate": 1e-05, |
| "loss": 0.0381, |
| "step": 975 |
| }, |
| { |
| "epoch": 5.2356020942408374, |
| "grad_norm": 4.066138744354248, |
| "learning_rate": 1e-05, |
| "loss": 0.0267, |
| "step": 1000 |
| }, |
| { |
| "epoch": 5.2356020942408374, |
| "eval_loss": 0.45016494393348694, |
| "eval_runtime": 112.8165, |
| "eval_samples_per_second": 7.517, |
| "eval_steps_per_second": 0.47, |
| "eval_wer": 24.57812946896749, |
| "eval_wer_ortho": 31.008930267908035, |
| "step": 1000 |
| }, |
| { |
| "epoch": 5.366492146596858, |
| "grad_norm": 2.1948580741882324, |
| "learning_rate": 1e-05, |
| "loss": 0.0271, |
| "step": 1025 |
| }, |
| { |
| "epoch": 5.49738219895288, |
| "grad_norm": 1.8259961605072021, |
| "learning_rate": 1e-05, |
| "loss": 0.0304, |
| "step": 1050 |
| }, |
| { |
| "epoch": 5.628272251308901, |
| "grad_norm": 2.1298553943634033, |
| "learning_rate": 1e-05, |
| "loss": 0.0327, |
| "step": 1075 |
| }, |
| { |
| "epoch": 5.7591623036649215, |
| "grad_norm": 1.3052338361740112, |
| "learning_rate": 1e-05, |
| "loss": 0.03, |
| "step": 1100 |
| }, |
| { |
| "epoch": 5.890052356020942, |
| "grad_norm": 2.323640823364258, |
| "learning_rate": 1e-05, |
| "loss": 0.0297, |
| "step": 1125 |
| }, |
| { |
| "epoch": 6.020942408376963, |
| "grad_norm": 0.8321124315261841, |
| "learning_rate": 1e-05, |
| "loss": 0.0237, |
| "step": 1150 |
| }, |
| { |
| "epoch": 6.151832460732984, |
| "grad_norm": 1.0479042530059814, |
| "learning_rate": 1e-05, |
| "loss": 0.0161, |
| "step": 1175 |
| }, |
| { |
| "epoch": 6.282722513089006, |
| "grad_norm": 0.9512850642204285, |
| "learning_rate": 1e-05, |
| "loss": 0.0171, |
| "step": 1200 |
| }, |
| { |
| "epoch": 6.4136125654450264, |
| "grad_norm": 1.3394098281860352, |
| "learning_rate": 1e-05, |
| "loss": 0.018, |
| "step": 1225 |
| }, |
| { |
| "epoch": 6.544502617801047, |
| "grad_norm": 1.1322811841964722, |
| "learning_rate": 1e-05, |
| "loss": 0.0179, |
| "step": 1250 |
| }, |
| { |
| "epoch": 6.544502617801047, |
| "eval_loss": 0.46418532729148865, |
| "eval_runtime": 114.086, |
| "eval_samples_per_second": 7.433, |
| "eval_steps_per_second": 0.465, |
| "eval_wer": 24.149108589951375, |
| "eval_wer_ortho": 30.277408322249666, |
| "step": 1250 |
| }, |
| { |
| "epoch": 6.675392670157068, |
| "grad_norm": 0.8465819954872131, |
| "learning_rate": 1e-05, |
| "loss": 0.0195, |
| "step": 1275 |
| }, |
| { |
| "epoch": 6.806282722513089, |
| "grad_norm": 2.5051374435424805, |
| "learning_rate": 1e-05, |
| "loss": 0.0198, |
| "step": 1300 |
| }, |
| { |
| "epoch": 6.93717277486911, |
| "grad_norm": 2.2962050437927246, |
| "learning_rate": 1e-05, |
| "loss": 0.0175, |
| "step": 1325 |
| }, |
| { |
| "epoch": 7.0680628272251305, |
| "grad_norm": 1.9513806104660034, |
| "learning_rate": 1e-05, |
| "loss": 0.0171, |
| "step": 1350 |
| }, |
| { |
| "epoch": 7.198952879581152, |
| "grad_norm": 0.6999716758728027, |
| "learning_rate": 1e-05, |
| "loss": 0.0139, |
| "step": 1375 |
| }, |
| { |
| "epoch": 7.329842931937173, |
| "grad_norm": 1.1750918626785278, |
| "learning_rate": 1e-05, |
| "loss": 0.011, |
| "step": 1400 |
| }, |
| { |
| "epoch": 7.460732984293194, |
| "grad_norm": 1.7177971601486206, |
| "learning_rate": 1e-05, |
| "loss": 0.01, |
| "step": 1425 |
| }, |
| { |
| "epoch": 7.591623036649215, |
| "grad_norm": 2.9293274879455566, |
| "learning_rate": 1e-05, |
| "loss": 0.0127, |
| "step": 1450 |
| }, |
| { |
| "epoch": 7.722513089005235, |
| "grad_norm": 0.5329703092575073, |
| "learning_rate": 1e-05, |
| "loss": 0.0107, |
| "step": 1475 |
| }, |
| { |
| "epoch": 7.853403141361256, |
| "grad_norm": 2.054241418838501, |
| "learning_rate": 1e-05, |
| "loss": 0.0112, |
| "step": 1500 |
| }, |
| { |
| "epoch": 7.853403141361256, |
| "eval_loss": 0.4861427843570709, |
| "eval_runtime": 113.9681, |
| "eval_samples_per_second": 7.441, |
| "eval_steps_per_second": 0.465, |
| "eval_wer": 24.358852130803697, |
| "eval_wer_ortho": 30.372411172335166, |
| "step": 1500 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 2000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 11, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.91190892306432e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|