Training in progress, step 100, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/config.json +1 -1
last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +23 -582
last-checkpoint/training_args.bin +1 -1

last-checkpoint/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/content/YeBhoneLin-Whiper-Small-Stream-2.0/last-checkpoint",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
   "apply_spec_augment": false,

 {
+  "_name_or_path": "openai/whisper-small",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
   "apply_spec_augment": false,

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c221c59c353a7c481ef8fb848e7a2c3921f470fcb417beec2b5bef362aa3bd5b
 size 966995080

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d18664503eeefe73d902122981c82e338589f72a78e1805df1a86211ffccefc
 size 966995080

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa70d6d073579eac4a8bf8a9837094800d8adea492cb54aab25e06e08ad6d60b
 size 1925064044

 version https://git-lfs.github.com/spec/v1
+oid sha256:814aee5aaf254d346a1bf9a7a5da84ade3549eadd13d79d058bea07adbd2e108
 size 1925064044

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d71c2c7ed0cfd7a1644a65fd7297e581eb7c34b11ce37eddc62f761ad914001e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad46ce4e5894519be6dee3b5c4bef67745e115bf01464ced9f219f603b4ee39e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd1612242bb0477b7b98b59bf0fa7f281d428f72225aa5cd84f5ffc18b7c2bcc
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0d35a84a90afac6ec40a34ed4fb70cbf6f66e82eb3de0a29476df83e41dc581f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,615 +1,56 @@
 {
-  "best_metric": 31.710700132100396,
-  "best_model_checkpoint": "./whisper-small-lt/checkpoint-2000",
-  "epoch": 5.0275,
-  "eval_steps": 500,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.0125,
-      "grad_norm": 1.318737506866455,
       "learning_rate": 5.000000000000001e-07,
-      "loss": 0.023,
       "step": 25
     },
     {
       "epoch": 0.025,
-      "grad_norm": 0.8892257213592529,
       "learning_rate": 1.0000000000000002e-06,
-      "loss": 0.0129,
       "step": 50
     },
     {
       "epoch": 0.0375,
-      "grad_norm": 1.2649672031402588,
       "learning_rate": 1.5e-06,
-      "loss": 0.019,
       "step": 75
     },
     {
       "epoch": 0.05,
-      "grad_norm": 0.9079497456550598,
       "learning_rate": 2.0000000000000003e-06,
-      "loss": 0.0201,
       "step": 100
     },
     {
-      "epoch": 0.0625,
-      "grad_norm": 2.3915770053863525,
-      "learning_rate": 2.5e-06,
-      "loss": 0.0192,
-      "step": 125
-    },
-    {
-      "epoch": 0.075,
-      "grad_norm": 1.043415904045105,
-      "learning_rate": 3e-06,
-      "loss": 0.0183,
-      "step": 150
-    },
-    {
-      "epoch": 0.0875,
-      "grad_norm": 1.3370391130447388,
-      "learning_rate": 3.5e-06,
-      "loss": 0.0153,
-      "step": 175
-    },
-    {
-      "epoch": 0.1,
-      "grad_norm": 1.7550990581512451,
-      "learning_rate": 4.000000000000001e-06,
-      "loss": 0.0196,
-      "step": 200
-    },
-    {
-      "epoch": 0.1125,
-      "grad_norm": 3.453934669494629,
-      "learning_rate": 4.5e-06,
-      "loss": 0.0215,
-      "step": 225
-    },
-    {
-      "epoch": 0.125,
-      "grad_norm": 2.3508036136627197,
-      "learning_rate": 5e-06,
-      "loss": 0.0214,
-      "step": 250
-    },
-    {
-      "epoch": 0.1375,
-      "grad_norm": 1.381133794784546,
-      "learning_rate": 5.500000000000001e-06,
-      "loss": 0.0253,
-      "step": 275
-    },
-    {
-      "epoch": 0.15,
-      "grad_norm": 1.2868778705596924,
-      "learning_rate": 6e-06,
-      "loss": 0.0168,
-      "step": 300
-    },
-    {
-      "epoch": 0.1625,
-      "grad_norm": 1.7141369581222534,
-      "learning_rate": 6.5000000000000004e-06,
-      "loss": 0.0182,
-      "step": 325
-    },
-    {
-      "epoch": 0.175,
-      "grad_norm": 2.0160534381866455,
-      "learning_rate": 7e-06,
-      "loss": 0.0197,
-      "step": 350
-    },
-    {
-      "epoch": 0.1875,
-      "grad_norm": 1.8141695261001587,
-      "learning_rate": 7.500000000000001e-06,
-      "loss": 0.0157,
-      "step": 375
-    },
-    {
-      "epoch": 1.0055,
-      "grad_norm": 2.000669002532959,
-      "learning_rate": 8.000000000000001e-06,
-      "loss": 0.0235,
-      "step": 400
-    },
-    {
-      "epoch": 1.018,
-      "grad_norm": 0.7991472482681274,
-      "learning_rate": 8.5e-06,
-      "loss": 0.0186,
-      "step": 425
-    },
-    {
-      "epoch": 1.0305,
-      "grad_norm": 1.885020136833191,
-      "learning_rate": 9e-06,
-      "loss": 0.0188,
-      "step": 450
-    },
-    {
-      "epoch": 1.043,
-      "grad_norm": 1.6318445205688477,
-      "learning_rate": 9.5e-06,
-      "loss": 0.0196,
-      "step": 475
-    },
-    {
-      "epoch": 1.0555,
-      "grad_norm": 1.235178828239441,
-      "learning_rate": 1e-05,
-      "loss": 0.0183,
-      "step": 500
-    },
-    {
-      "epoch": 1.0555,
-      "eval_loss": 0.16427084803581238,
-      "eval_runtime": 628.8932,
-      "eval_samples_per_second": 1.013,
-      "eval_steps_per_second": 0.127,
-      "eval_wer": 36.38044914134743,
-      "step": 500
-    },
-    {
-      "epoch": 1.068,
-      "grad_norm": 1.9209116697311401,
-      "learning_rate": 9.833333333333333e-06,
-      "loss": 0.0171,
-      "step": 525
-    },
-    {
-      "epoch": 1.0805,
-      "grad_norm": 1.8610273599624634,
-      "learning_rate": 9.666666666666667e-06,
-      "loss": 0.0159,
-      "step": 550
-    },
-    {
-      "epoch": 1.093,
-      "grad_norm": 1.6190500259399414,
-      "learning_rate": 9.5e-06,
-      "loss": 0.0138,
-      "step": 575
-    },
-    {
-      "epoch": 1.1055,
-      "grad_norm": 1.2552204132080078,
-      "learning_rate": 9.333333333333334e-06,
-      "loss": 0.0178,
-      "step": 600
-    },
-    {
-      "epoch": 1.1179999999999999,
-      "grad_norm": 1.5186320543289185,
-      "learning_rate": 9.166666666666666e-06,
-      "loss": 0.0165,
-      "step": 625
-    },
-    {
-      "epoch": 1.1305,
-      "grad_norm": 2.233752727508545,
-      "learning_rate": 9e-06,
-      "loss": 0.0155,
-      "step": 650
-    },
-    {
-      "epoch": 1.143,
-      "grad_norm": 1.0048011541366577,
-      "learning_rate": 8.833333333333334e-06,
-      "loss": 0.0165,
-      "step": 675
-    },
-    {
-      "epoch": 1.1555,
-      "grad_norm": 2.4041740894317627,
-      "learning_rate": 8.666666666666668e-06,
-      "loss": 0.0111,
-      "step": 700
-    },
-    {
-      "epoch": 1.168,
-      "grad_norm": 2.370142936706543,
-      "learning_rate": 8.506666666666668e-06,
-      "loss": 0.0135,
-      "step": 725
-    },
-    {
-      "epoch": 1.1804999999999999,
-      "grad_norm": 1.4969311952590942,
-      "learning_rate": 8.34e-06,
-      "loss": 0.0155,
-      "step": 750
-    },
-    {
-      "epoch": 1.193,
-      "grad_norm": 2.3001623153686523,
-      "learning_rate": 8.173333333333334e-06,
-      "loss": 0.0179,
-      "step": 775
-    },
-    {
-      "epoch": 2.011,
-      "grad_norm": 1.519529938697815,
-      "learning_rate": 8.006666666666667e-06,
-      "loss": 0.0212,
-      "step": 800
-    },
-    {
-      "epoch": 2.0235,
-      "grad_norm": 2.144901752471924,
-      "learning_rate": 7.840000000000001e-06,
-      "loss": 0.0136,
-      "step": 825
-    },
-    {
-      "epoch": 2.036,
-      "grad_norm": 1.2353721857070923,
-      "learning_rate": 7.673333333333333e-06,
-      "loss": 0.0149,
-      "step": 850
-    },
-    {
-      "epoch": 2.0485,
-      "grad_norm": 1.0761120319366455,
-      "learning_rate": 7.506666666666668e-06,
-      "loss": 0.0159,
-      "step": 875
-    },
-    {
-      "epoch": 2.061,
-      "grad_norm": 1.4343351125717163,
-      "learning_rate": 7.340000000000001e-06,
-      "loss": 0.0117,
-      "step": 900
-    },
-    {
-      "epoch": 2.0735,
-      "grad_norm": 0.9911390542984009,
-      "learning_rate": 7.173333333333335e-06,
-      "loss": 0.0132,
-      "step": 925
-    },
-    {
-      "epoch": 2.086,
-      "grad_norm": 1.0542834997177124,
-      "learning_rate": 7.006666666666667e-06,
-      "loss": 0.0091,
-      "step": 950
-    },
-    {
-      "epoch": 2.0985,
-      "grad_norm": 1.383226990699768,
-      "learning_rate": 6.8400000000000014e-06,
-      "loss": 0.0108,
-      "step": 975
-    },
-    {
-      "epoch": 2.111,
-      "grad_norm": 1.7129534482955933,
-      "learning_rate": 6.6733333333333335e-06,
-      "loss": 0.0107,
-      "step": 1000
-    },
-    {
-      "epoch": 2.111,
-      "eval_loss": 0.17394371330738068,
-      "eval_runtime": 633.8826,
-      "eval_samples_per_second": 1.005,
-      "eval_steps_per_second": 0.126,
-      "eval_wer": 35.19154557463672,
-      "step": 1000
-    },
-    {
-      "epoch": 2.1235,
-      "grad_norm": 1.8474119901657104,
-      "learning_rate": 6.5066666666666665e-06,
-      "loss": 0.011,
-      "step": 1025
-    },
-    {
-      "epoch": 2.136,
-      "grad_norm": 1.9425179958343506,
-      "learning_rate": 6.34e-06,
-      "loss": 0.0134,
-      "step": 1050
-    },
-    {
-      "epoch": 2.1485,
-      "grad_norm": 1.1808143854141235,
-      "learning_rate": 6.173333333333333e-06,
-      "loss": 0.0088,
-      "step": 1075
-    },
-    {
-      "epoch": 2.161,
-      "grad_norm": 1.567855715751648,
-      "learning_rate": 6.006666666666667e-06,
-      "loss": 0.0108,
-      "step": 1100
-    },
-    {
-      "epoch": 2.1734999999999998,
-      "grad_norm": 2.328662395477295,
-      "learning_rate": 5.84e-06,
-      "loss": 0.0129,
-      "step": 1125
-    },
-    {
-      "epoch": 2.186,
-      "grad_norm": 1.0424344539642334,
-      "learning_rate": 5.673333333333334e-06,
-      "loss": 0.0116,
-      "step": 1150
-    },
-    {
-      "epoch": 3.004,
-      "grad_norm": 1.357283592224121,
-      "learning_rate": 5.506666666666667e-06,
-      "loss": 0.014,
-      "step": 1175
-    },
-    {
-      "epoch": 3.0165,
-      "grad_norm": 1.666524887084961,
-      "learning_rate": 5.3400000000000005e-06,
-      "loss": 0.0111,
-      "step": 1200
-    },
-    {
-      "epoch": 3.029,
-      "grad_norm": 1.2688168287277222,
-      "learning_rate": 5.1733333333333335e-06,
-      "loss": 0.0084,
-      "step": 1225
-    },
-    {
-      "epoch": 3.0415,
-      "grad_norm": 1.1632572412490845,
-      "learning_rate": 5.006666666666667e-06,
-      "loss": 0.0078,
-      "step": 1250
-    },
-    {
-      "epoch": 3.054,
-      "grad_norm": 1.4629402160644531,
-      "learning_rate": 4.84e-06,
-      "loss": 0.0059,
-      "step": 1275
-    },
-    {
-      "epoch": 3.0665,
-      "grad_norm": 0.6573676466941833,
-      "learning_rate": 4.673333333333333e-06,
-      "loss": 0.0054,
-      "step": 1300
-    },
-    {
-      "epoch": 3.079,
-      "grad_norm": 0.9792174100875854,
-      "learning_rate": 4.506666666666667e-06,
-      "loss": 0.0053,
-      "step": 1325
-    },
-    {
-      "epoch": 3.0915,
-      "grad_norm": 0.8290985226631165,
-      "learning_rate": 4.34e-06,
-      "loss": 0.0042,
-      "step": 1350
-    },
-    {
-      "epoch": 3.104,
-      "grad_norm": 1.0542296171188354,
-      "learning_rate": 4.173333333333334e-06,
-      "loss": 0.0058,
-      "step": 1375
-    },
-    {
-      "epoch": 3.1165,
-      "grad_norm": 0.30175602436065674,
-      "learning_rate": 4.006666666666667e-06,
-      "loss": 0.0073,
-      "step": 1400
-    },
-    {
-      "epoch": 3.129,
-      "grad_norm": 1.601818561553955,
-      "learning_rate": 3.8400000000000005e-06,
-      "loss": 0.0066,
-      "step": 1425
-    },
-    {
-      "epoch": 3.1415,
-      "grad_norm": 1.0414142608642578,
-      "learning_rate": 3.673333333333334e-06,
-      "loss": 0.0082,
-      "step": 1450
-    },
-    {
-      "epoch": 3.154,
-      "grad_norm": 0.8256711959838867,
-      "learning_rate": 3.5066666666666673e-06,
-      "loss": 0.0052,
-      "step": 1475
-    },
-    {
-      "epoch": 3.1665,
-      "grad_norm": 1.2712088823318481,
-      "learning_rate": 3.3400000000000006e-06,
-      "loss": 0.0053,
-      "step": 1500
-    },
-    {
-      "epoch": 3.1665,
-      "eval_loss": 0.19207946956157684,
-      "eval_runtime": 631.9168,
-      "eval_samples_per_second": 1.008,
-      "eval_steps_per_second": 0.127,
-      "eval_wer": 35.165125495376486,
-      "step": 1500
-    },
-    {
-      "epoch": 3.179,
-      "grad_norm": 1.0322113037109375,
-      "learning_rate": 3.173333333333334e-06,
-      "loss": 0.0059,
-      "step": 1525
-    },
-    {
-      "epoch": 3.1915,
-      "grad_norm": 1.2165710926055908,
-      "learning_rate": 3.0066666666666674e-06,
-      "loss": 0.0047,
-      "step": 1550
-    },
-    {
-      "epoch": 4.0095,
-      "grad_norm": 1.093904972076416,
-      "learning_rate": 2.84e-06,
-      "loss": 0.0061,
-      "step": 1575
-    },
-    {
-      "epoch": 4.022,
-      "grad_norm": 0.7058098912239075,
-      "learning_rate": 2.6733333333333333e-06,
-      "loss": 0.0046,
-      "step": 1600
-    },
-    {
-      "epoch": 4.0345,
-      "grad_norm": 0.35674819350242615,
-      "learning_rate": 2.5066666666666667e-06,
-      "loss": 0.0035,
-      "step": 1625
-    },
-    {
-      "epoch": 4.047,
-      "grad_norm": 0.7753161191940308,
-      "learning_rate": 2.3400000000000005e-06,
-      "loss": 0.0023,
-      "step": 1650
-    },
-    {
-      "epoch": 4.0595,
-      "grad_norm": 0.5088989734649658,
-      "learning_rate": 2.1733333333333334e-06,
-      "loss": 0.002,
-      "step": 1675
-    },
-    {
-      "epoch": 4.072,
-      "grad_norm": 1.716977596282959,
-      "learning_rate": 2.006666666666667e-06,
-      "loss": 0.0025,
-      "step": 1700
-    },
-    {
-      "epoch": 4.0845,
-      "grad_norm": 0.2785784900188446,
-      "learning_rate": 1.8400000000000002e-06,
-      "loss": 0.0017,
-      "step": 1725
-    },
-    {
-      "epoch": 4.097,
-      "grad_norm": 0.34034302830696106,
-      "learning_rate": 1.6733333333333335e-06,
-      "loss": 0.0018,
-      "step": 1750
-    },
-    {
-      "epoch": 4.1095,
-      "grad_norm": 0.626815140247345,
-      "learning_rate": 1.506666666666667e-06,
-      "loss": 0.0026,
-      "step": 1775
-    },
-    {
-      "epoch": 4.122,
-      "grad_norm": 0.41825541853904724,
-      "learning_rate": 1.34e-06,
-      "loss": 0.0025,
-      "step": 1800
-    },
-    {
-      "epoch": 4.1345,
-      "grad_norm": 0.6465583443641663,
-      "learning_rate": 1.1733333333333335e-06,
-      "loss": 0.0033,
-      "step": 1825
-    },
-    {
-      "epoch": 4.147,
-      "grad_norm": 0.3226906359195709,
-      "learning_rate": 1.0066666666666668e-06,
-      "loss": 0.0024,
-      "step": 1850
-    },
-    {
-      "epoch": 4.1595,
-      "grad_norm": 0.4798910915851593,
-      "learning_rate": 8.400000000000001e-07,
-      "loss": 0.002,
-      "step": 1875
-    },
-    {
-      "epoch": 4.172,
-      "grad_norm": 0.41319090127944946,
-      "learning_rate": 6.733333333333334e-07,
-      "loss": 0.0019,
-      "step": 1900
-    },
-    {
-      "epoch": 4.1845,
-      "grad_norm": 0.6397626996040344,
-      "learning_rate": 5.066666666666667e-07,
-      "loss": 0.0016,
-      "step": 1925
-    },
-    {
-      "epoch": 5.0025,
-      "grad_norm": 0.5262264013290405,
-      "learning_rate": 3.4000000000000003e-07,
-      "loss": 0.0017,
-      "step": 1950
-    },
-    {
-      "epoch": 5.015,
-      "grad_norm": 0.509278416633606,
-      "learning_rate": 1.7333333333333335e-07,
-      "loss": 0.0037,
-      "step": 1975
-    },
-    {
-      "epoch": 5.0275,
-      "grad_norm": 0.8492897748947144,
-      "learning_rate": 6.666666666666667e-09,
-      "loss": 0.0022,
-      "step": 2000
-    },
-    {
-      "epoch": 5.0275,
-      "eval_loss": 0.1655821055173874,
-      "eval_runtime": 636.0021,
-      "eval_samples_per_second": 1.002,
-      "eval_steps_per_second": 0.126,
-      "eval_wer": 31.710700132100396,
-      "step": 2000
     }
   ],
   "logging_steps": 25,
   "max_steps": 2000,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 9223372036854775807,
-  "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
@@ -617,12 +58,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 9.2217464672256e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 34.02245706737121,
+  "best_model_checkpoint": "./whisper-small-lt/checkpoint-100",
+  "epoch": 0.05,
+  "eval_steps": 100,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.0125,
+      "grad_norm": 2.394010305404663,
       "learning_rate": 5.000000000000001e-07,
+      "loss": 0.1152,
       "step": 25
     },
     {
       "epoch": 0.025,
+      "grad_norm": 1.771360158920288,
       "learning_rate": 1.0000000000000002e-06,
+      "loss": 0.0713,
       "step": 50
     },
     {
       "epoch": 0.0375,
+      "grad_norm": 2.0305898189544678,
       "learning_rate": 1.5e-06,
+      "loss": 0.056,
       "step": 75
     },
     {
       "epoch": 0.05,
+      "grad_norm": 2.1570658683776855,
       "learning_rate": 2.0000000000000003e-06,
+      "loss": 0.0489,
       "step": 100
     },
     {
+      "epoch": 0.05,
+      "eval_loss": 0.1409192979335785,
+      "eval_runtime": 624.8165,
+      "eval_samples_per_second": 1.019,
+      "eval_steps_per_second": 0.128,
+      "eval_wer": 34.02245706737121,
+      "step": 100
     }
   ],
   "logging_steps": 25,
   "max_steps": 2000,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 9223372036854775807,
+  "save_steps": 100,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.61736640512e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e34c60b814e2c49bccf2034b010f909e8e6f936f8fc498b53be4bfd23aeeae6
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:c315f0b3f3b9c04dd0f73fe7a958b5781a698b4bc9f0eb0dbcedca8beb018820
 size 5304