Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

config.json +48 -0
model.safetensors +3 -0
optimizer.pt +3 -0
preprocessor_config.json +15 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
trainer_state.json +925 -0
training_args.bin +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "apply_spec_augment": false,
+  "architectures": [
+    "WhisperForAudioClassification"
+  ],
+  "attention_dropout": 0.0,
+  "begin_suppress_tokens": [
+    220,
+    50256
+  ],
+  "bos_token_id": 50257,
+  "classifier_proj_size": 256,
+  "d_model": 1280,
+  "decoder_attention_heads": 20,
+  "decoder_ffn_dim": 5120,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 4,
+  "decoder_start_token_id": 50258,
+  "dropout": 0.0,
+  "dtype": "float32",
+  "encoder_attention_heads": 20,
+  "encoder_ffn_dim": 5120,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 32,
+  "eos_token_id": 50257,
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_source_positions": 50,
+  "max_target_positions": 448,
+  "median_filter_width": 7,
+  "model_type": "whisper",
+  "num_hidden_layers": 32,
+  "num_mel_bins": 128,
+  "pad_token_id": 50257,
+  "scale_embedding": false,
+  "transformers_version": "4.56.1",
+  "use_cache": true,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 51866
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:80182668b69c07285b58c247dc7247b0b70d3d0b054d62e2f5b8e9332de2f17a
+size 2541820056

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e72b390fa766db8618958a089a79768381a2c347dfa69080e9aa675d025475a7
+size 5083449089

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "chunk_length": 30,
+  "dither": 0.0,
+  "feature_extractor_type": "WhisperFeatureExtractor",
+  "feature_size": 128,
+  "hop_length": 160,
+  "n_fft": 400,
+  "n_samples": 480000,
+  "nb_max_frames": 3000,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "WhisperProcessor",
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a5e082f7d3fac8cb998b4b273337660d6eaa6dfa71e0895b57ae6a9fa154c16
+size 14645

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2812a8fabdb289db1d8c450993f518b5a176efad8bbe135c89aaa82952a3b121
+size 1465

trainer_state.json ADDED Viewed

	@@ -0,0 +1,925 @@

+{
+  "best_global_step": 1170,
+  "best_metric": 0.9991928974979822,
+  "best_model_checkpoint": "wav2vec2_frog_classifier_sew_d/checkpoint-1170",
+  "epoch": 6.0,
+  "eval_steps": 500,
+  "global_step": 1170,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05128205128205128,
+      "grad_norm": 10.577055931091309,
+      "learning_rate": 1.9230769230769234e-07,
+      "loss": 0.6805,
+      "step": 10
+    },
+    {
+      "epoch": 0.10256410256410256,
+      "grad_norm": 42.849281311035156,
+      "learning_rate": 4.05982905982906e-07,
+      "loss": 0.6605,
+      "step": 20
+    },
+    {
+      "epoch": 0.15384615384615385,
+      "grad_norm": 22.91461753845215,
+      "learning_rate": 6.196581196581197e-07,
+      "loss": 0.6164,
+      "step": 30
+    },
+    {
+      "epoch": 0.20512820512820512,
+      "grad_norm": 35.23799514770508,
+      "learning_rate": 8.333333333333333e-07,
+      "loss": 0.5014,
+      "step": 40
+    },
+    {
+      "epoch": 0.2564102564102564,
+      "grad_norm": 8.47977352142334,
+      "learning_rate": 1.047008547008547e-06,
+      "loss": 0.3743,
+      "step": 50
+    },
+    {
+      "epoch": 0.3076923076923077,
+      "grad_norm": 160.9004669189453,
+      "learning_rate": 1.2606837606837608e-06,
+      "loss": 0.2865,
+      "step": 60
+    },
+    {
+      "epoch": 0.358974358974359,
+      "grad_norm": 5.323972702026367,
+      "learning_rate": 1.4743589743589745e-06,
+      "loss": 0.1922,
+      "step": 70
+    },
+    {
+      "epoch": 0.41025641025641024,
+      "grad_norm": 246.73423767089844,
+      "learning_rate": 1.6880341880341883e-06,
+      "loss": 0.2413,
+      "step": 80
+    },
+    {
+      "epoch": 0.46153846153846156,
+      "grad_norm": 5.069357395172119,
+      "learning_rate": 1.9017094017094018e-06,
+      "loss": 0.1617,
+      "step": 90
+    },
+    {
+      "epoch": 0.5128205128205128,
+      "grad_norm": 2.5714402198791504,
+      "learning_rate": 2.1153846153846155e-06,
+      "loss": 0.1292,
+      "step": 100
+    },
+    {
+      "epoch": 0.5641025641025641,
+      "grad_norm": 284.4996643066406,
+      "learning_rate": 2.3290598290598295e-06,
+      "loss": 0.0801,
+      "step": 110
+    },
+    {
+      "epoch": 0.6153846153846154,
+      "grad_norm": 112.67489624023438,
+      "learning_rate": 2.542735042735043e-06,
+      "loss": 0.2759,
+      "step": 120
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.44987669587135315,
+      "learning_rate": 2.756410256410257e-06,
+      "loss": 0.0573,
+      "step": 130
+    },
+    {
+      "epoch": 0.717948717948718,
+      "grad_norm": 25.903047561645508,
+      "learning_rate": 2.9700854700854705e-06,
+      "loss": 0.1032,
+      "step": 140
+    },
+    {
+      "epoch": 0.7692307692307693,
+      "grad_norm": 0.33153071999549866,
+      "learning_rate": 3.183760683760684e-06,
+      "loss": 0.0777,
+      "step": 150
+    },
+    {
+      "epoch": 0.8205128205128205,
+      "grad_norm": 0.2545352876186371,
+      "learning_rate": 3.397435897435898e-06,
+      "loss": 0.1243,
+      "step": 160
+    },
+    {
+      "epoch": 0.8717948717948718,
+      "grad_norm": 0.19522325694561005,
+      "learning_rate": 3.6111111111111115e-06,
+      "loss": 0.0335,
+      "step": 170
+    },
+    {
+      "epoch": 0.9230769230769231,
+      "grad_norm": 0.17608708143234253,
+      "learning_rate": 3.8247863247863246e-06,
+      "loss": 0.0087,
+      "step": 180
+    },
+    {
+      "epoch": 0.9743589743589743,
+      "grad_norm": 12.398482322692871,
+      "learning_rate": 4.0384615384615385e-06,
+      "loss": 0.065,
+      "step": 190
+    },
+    {
+      "epoch": 1.0,
+      "eval_f1": 0.9951768488745981,
+      "eval_fbeta": 0.9923051464968622,
+      "eval_loss": 0.024110933765769005,
+      "eval_precision": 0.9904,
+      "eval_recall": 1.0,
+      "eval_runtime": 19.5057,
+      "eval_samples_per_second": 68.595,
+      "eval_steps_per_second": 8.613,
+      "step": 195
+    },
+    {
+      "epoch": 1.0256410256410255,
+      "grad_norm": 388.4844055175781,
+      "learning_rate": 4.2521367521367524e-06,
+      "loss": 0.0407,
+      "step": 200
+    },
+    {
+      "epoch": 1.0769230769230769,
+      "grad_norm": 6.999018669128418,
+      "learning_rate": 4.465811965811966e-06,
+      "loss": 0.266,
+      "step": 210
+    },
+    {
+      "epoch": 1.1282051282051282,
+      "grad_norm": 364.2523498535156,
+      "learning_rate": 4.6794871794871795e-06,
+      "loss": 0.1352,
+      "step": 220
+    },
+    {
+      "epoch": 1.1794871794871795,
+      "grad_norm": 0.3629947304725647,
+      "learning_rate": 4.8931623931623934e-06,
+      "loss": 0.0926,
+      "step": 230
+    },
+    {
+      "epoch": 1.2307692307692308,
+      "grad_norm": 1.258070945739746,
+      "learning_rate": 5.1068376068376065e-06,
+      "loss": 0.0389,
+      "step": 240
+    },
+    {
+      "epoch": 1.282051282051282,
+      "grad_norm": 0.1260978728532791,
+      "learning_rate": 5.320512820512821e-06,
+      "loss": 0.0372,
+      "step": 250
+    },
+    {
+      "epoch": 1.3333333333333333,
+      "grad_norm": 0.113288514316082,
+      "learning_rate": 5.534188034188035e-06,
+      "loss": 0.0708,
+      "step": 260
+    },
+    {
+      "epoch": 1.3846153846153846,
+      "grad_norm": 0.1554139405488968,
+      "learning_rate": 5.7478632478632475e-06,
+      "loss": 0.1013,
+      "step": 270
+    },
+    {
+      "epoch": 1.435897435897436,
+      "grad_norm": 0.14283470809459686,
+      "learning_rate": 5.961538461538462e-06,
+      "loss": 0.0084,
+      "step": 280
+    },
+    {
+      "epoch": 1.4871794871794872,
+      "grad_norm": 0.12477540969848633,
+      "learning_rate": 6.175213675213676e-06,
+      "loss": 0.006,
+      "step": 290
+    },
+    {
+      "epoch": 1.5384615384615383,
+      "grad_norm": 0.7855970859527588,
+      "learning_rate": 6.3888888888888885e-06,
+      "loss": 0.1667,
+      "step": 300
+    },
+    {
+      "epoch": 1.5897435897435899,
+      "grad_norm": 0.18101628124713898,
+      "learning_rate": 6.602564102564103e-06,
+      "loss": 0.1045,
+      "step": 310
+    },
+    {
+      "epoch": 1.641025641025641,
+      "grad_norm": 0.11964868754148483,
+      "learning_rate": 6.816239316239317e-06,
+      "loss": 0.0071,
+      "step": 320
+    },
+    {
+      "epoch": 1.6923076923076923,
+      "grad_norm": 0.09683432430028915,
+      "learning_rate": 7.02991452991453e-06,
+      "loss": 0.0051,
+      "step": 330
+    },
+    {
+      "epoch": 1.7435897435897436,
+      "grad_norm": 0.16468480229377747,
+      "learning_rate": 7.243589743589744e-06,
+      "loss": 0.0689,
+      "step": 340
+    },
+    {
+      "epoch": 1.7948717948717947,
+      "grad_norm": 0.11897635459899902,
+      "learning_rate": 7.457264957264958e-06,
+      "loss": 0.0718,
+      "step": 350
+    },
+    {
+      "epoch": 1.8461538461538463,
+      "grad_norm": 0.08642622083425522,
+      "learning_rate": 7.670940170940172e-06,
+      "loss": 0.0042,
+      "step": 360
+    },
+    {
+      "epoch": 1.8974358974358974,
+      "grad_norm": 0.09093068540096283,
+      "learning_rate": 7.884615384615384e-06,
+      "loss": 0.0388,
+      "step": 370
+    },
+    {
+      "epoch": 1.9487179487179487,
+      "grad_norm": 0.2035251259803772,
+      "learning_rate": 8.098290598290598e-06,
+      "loss": 0.0596,
+      "step": 380
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.12319870293140411,
+      "learning_rate": 8.311965811965812e-06,
+      "loss": 0.0388,
+      "step": 390
+    },
+    {
+      "epoch": 2.0,
+      "eval_f1": 0.9959514170040485,
+      "eval_fbeta": 0.9974050446451359,
+      "eval_loss": 0.019416945055127144,
+      "eval_precision": 0.9983766233766234,
+      "eval_recall": 0.9935379644588045,
+      "eval_runtime": 19.5687,
+      "eval_samples_per_second": 68.375,
+      "eval_steps_per_second": 8.585,
+      "step": 390
+    },
+    {
+      "epoch": 2.051282051282051,
+      "grad_norm": 0.0893879383802414,
+      "learning_rate": 8.525641025641026e-06,
+      "loss": 0.0043,
+      "step": 400
+    },
+    {
+      "epoch": 2.1025641025641026,
+      "grad_norm": 0.07179196923971176,
+      "learning_rate": 8.73931623931624e-06,
+      "loss": 0.0036,
+      "step": 410
+    },
+    {
+      "epoch": 2.1538461538461537,
+      "grad_norm": 0.06529832631349564,
+      "learning_rate": 8.952991452991454e-06,
+      "loss": 0.0032,
+      "step": 420
+    },
+    {
+      "epoch": 2.2051282051282053,
+      "grad_norm": 0.06070837751030922,
+      "learning_rate": 9.166666666666666e-06,
+      "loss": 0.0151,
+      "step": 430
+    },
+    {
+      "epoch": 2.2564102564102564,
+      "grad_norm": 0.15651676058769226,
+      "learning_rate": 9.38034188034188e-06,
+      "loss": 0.1114,
+      "step": 440
+    },
+    {
+      "epoch": 2.3076923076923075,
+      "grad_norm": 0.07094116508960724,
+      "learning_rate": 9.594017094017094e-06,
+      "loss": 0.0045,
+      "step": 450
+    },
+    {
+      "epoch": 2.358974358974359,
+      "grad_norm": 0.06716040521860123,
+      "learning_rate": 9.807692307692308e-06,
+      "loss": 0.0033,
+      "step": 460
+    },
+    {
+      "epoch": 2.41025641025641,
+      "grad_norm": 0.05951394885778427,
+      "learning_rate": 9.994658119658121e-06,
+      "loss": 0.0029,
+      "step": 470
+    },
+    {
+      "epoch": 2.4615384615384617,
+      "grad_norm": 0.05390568822622299,
+      "learning_rate": 9.941239316239318e-06,
+      "loss": 0.0026,
+      "step": 480
+    },
+    {
+      "epoch": 2.5128205128205128,
+      "grad_norm": 0.05104345828294754,
+      "learning_rate": 9.887820512820514e-06,
+      "loss": 0.0024,
+      "step": 490
+    },
+    {
+      "epoch": 2.564102564102564,
+      "grad_norm": 0.04729696735739708,
+      "learning_rate": 9.83440170940171e-06,
+      "loss": 0.0022,
+      "step": 500
+    },
+    {
+      "epoch": 2.6153846153846154,
+      "grad_norm": 0.04450414329767227,
+      "learning_rate": 9.780982905982906e-06,
+      "loss": 0.002,
+      "step": 510
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.041944343596696854,
+      "learning_rate": 9.727564102564104e-06,
+      "loss": 0.0019,
+      "step": 520
+    },
+    {
+      "epoch": 2.717948717948718,
+      "grad_norm": 0.03941260650753975,
+      "learning_rate": 9.6741452991453e-06,
+      "loss": 0.0018,
+      "step": 530
+    },
+    {
+      "epoch": 2.769230769230769,
+      "grad_norm": 0.037517789751291275,
+      "learning_rate": 9.620726495726497e-06,
+      "loss": 0.0017,
+      "step": 540
+    },
+    {
+      "epoch": 2.8205128205128203,
+      "grad_norm": 0.036115214228630066,
+      "learning_rate": 9.567307692307693e-06,
+      "loss": 0.0016,
+      "step": 550
+    },
+    {
+      "epoch": 2.871794871794872,
+      "grad_norm": 0.0382937453687191,
+      "learning_rate": 9.51388888888889e-06,
+      "loss": 0.0418,
+      "step": 560
+    },
+    {
+      "epoch": 2.9230769230769234,
+      "grad_norm": 0.03789462521672249,
+      "learning_rate": 9.460470085470086e-06,
+      "loss": 0.0016,
+      "step": 570
+    },
+    {
+      "epoch": 2.9743589743589745,
+      "grad_norm": 0.03520243614912033,
+      "learning_rate": 9.407051282051283e-06,
+      "loss": 0.0016,
+      "step": 580
+    },
+    {
+      "epoch": 3.0,
+      "eval_f1": 0.9983844911147012,
+      "eval_fbeta": 0.9983844111147077,
+      "eval_loss": 0.010780692100524902,
+      "eval_precision": 0.9983844911147012,
+      "eval_recall": 0.9983844911147012,
+      "eval_runtime": 19.7043,
+      "eval_samples_per_second": 67.904,
+      "eval_steps_per_second": 8.526,
+      "step": 585
+    },
+    {
+      "epoch": 3.0256410256410255,
+      "grad_norm": 0.034466702491045,
+      "learning_rate": 9.35363247863248e-06,
+      "loss": 0.0015,
+      "step": 590
+    },
+    {
+      "epoch": 3.076923076923077,
+      "grad_norm": 0.032045699656009674,
+      "learning_rate": 9.300213675213676e-06,
+      "loss": 0.0014,
+      "step": 600
+    },
+    {
+      "epoch": 3.128205128205128,
+      "grad_norm": 0.03104749321937561,
+      "learning_rate": 9.246794871794873e-06,
+      "loss": 0.0013,
+      "step": 610
+    },
+    {
+      "epoch": 3.1794871794871793,
+      "grad_norm": 0.029300186783075333,
+      "learning_rate": 9.193376068376069e-06,
+      "loss": 0.0013,
+      "step": 620
+    },
+    {
+      "epoch": 3.230769230769231,
+      "grad_norm": 0.02834387496113777,
+      "learning_rate": 9.139957264957266e-06,
+      "loss": 0.0012,
+      "step": 630
+    },
+    {
+      "epoch": 3.282051282051282,
+      "grad_norm": 0.027324577793478966,
+      "learning_rate": 9.086538461538462e-06,
+      "loss": 0.0012,
+      "step": 640
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.026796886697411537,
+      "learning_rate": 9.033119658119659e-06,
+      "loss": 0.0011,
+      "step": 650
+    },
+    {
+      "epoch": 3.3846153846153846,
+      "grad_norm": 0.025395015254616737,
+      "learning_rate": 8.979700854700855e-06,
+      "loss": 0.0011,
+      "step": 660
+    },
+    {
+      "epoch": 3.435897435897436,
+      "grad_norm": 0.024470018222928047,
+      "learning_rate": 8.926282051282053e-06,
+      "loss": 0.001,
+      "step": 670
+    },
+    {
+      "epoch": 3.4871794871794872,
+      "grad_norm": 0.02359500713646412,
+      "learning_rate": 8.872863247863248e-06,
+      "loss": 0.001,
+      "step": 680
+    },
+    {
+      "epoch": 3.5384615384615383,
+      "grad_norm": 0.02294657565653324,
+      "learning_rate": 8.819444444444445e-06,
+      "loss": 0.0009,
+      "step": 690
+    },
+    {
+      "epoch": 3.58974358974359,
+      "grad_norm": 0.02199380099773407,
+      "learning_rate": 8.766025641025641e-06,
+      "loss": 0.0009,
+      "step": 700
+    },
+    {
+      "epoch": 3.641025641025641,
+      "grad_norm": 0.0217047818005085,
+      "learning_rate": 8.712606837606838e-06,
+      "loss": 0.0009,
+      "step": 710
+    },
+    {
+      "epoch": 3.6923076923076925,
+      "grad_norm": 0.020800307393074036,
+      "learning_rate": 8.659188034188036e-06,
+      "loss": 0.0008,
+      "step": 720
+    },
+    {
+      "epoch": 3.7435897435897436,
+      "grad_norm": 0.02008337713778019,
+      "learning_rate": 8.605769230769232e-06,
+      "loss": 0.0008,
+      "step": 730
+    },
+    {
+      "epoch": 3.7948717948717947,
+      "grad_norm": 0.02081192284822464,
+      "learning_rate": 8.552350427350427e-06,
+      "loss": 0.0008,
+      "step": 740
+    },
+    {
+      "epoch": 3.8461538461538463,
+      "grad_norm": 0.01924249343574047,
+      "learning_rate": 8.498931623931624e-06,
+      "loss": 0.0008,
+      "step": 750
+    },
+    {
+      "epoch": 3.8974358974358974,
+      "grad_norm": 0.018428660929203033,
+      "learning_rate": 8.445512820512822e-06,
+      "loss": 0.0007,
+      "step": 760
+    },
+    {
+      "epoch": 3.948717948717949,
+      "grad_norm": 0.01805875450372696,
+      "learning_rate": 8.392094017094018e-06,
+      "loss": 0.0007,
+      "step": 770
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.02028258703649044,
+      "learning_rate": 8.338675213675215e-06,
+      "loss": 0.1822,
+      "step": 780
+    },
+    {
+      "epoch": 4.0,
+      "eval_f1": 0.9926769731489016,
+      "eval_fbeta": 0.9970577813430838,
+      "eval_loss": 0.04822924733161926,
+      "eval_precision": 1.0,
+      "eval_recall": 0.9854604200323102,
+      "eval_runtime": 19.6411,
+      "eval_samples_per_second": 68.122,
+      "eval_steps_per_second": 8.553,
+      "step": 780
+    },
+    {
+      "epoch": 4.051282051282051,
+      "grad_norm": 0.022473065182566643,
+      "learning_rate": 8.285256410256411e-06,
+      "loss": 0.0009,
+      "step": 790
+    },
+    {
+      "epoch": 4.102564102564102,
+      "grad_norm": 0.020844997838139534,
+      "learning_rate": 8.231837606837608e-06,
+      "loss": 0.0008,
+      "step": 800
+    },
+    {
+      "epoch": 4.153846153846154,
+      "grad_norm": 0.03373854234814644,
+      "learning_rate": 8.178418803418804e-06,
+      "loss": 0.0443,
+      "step": 810
+    },
+    {
+      "epoch": 4.205128205128205,
+      "grad_norm": 11.480348587036133,
+      "learning_rate": 8.125000000000001e-06,
+      "loss": 0.1932,
+      "step": 820
+    },
+    {
+      "epoch": 4.256410256410256,
+      "grad_norm": 0.03889571130275726,
+      "learning_rate": 8.071581196581197e-06,
+      "loss": 0.2656,
+      "step": 830
+    },
+    {
+      "epoch": 4.3076923076923075,
+      "grad_norm": 0.08549106866121292,
+      "learning_rate": 8.018162393162394e-06,
+      "loss": 0.078,
+      "step": 840
+    },
+    {
+      "epoch": 4.358974358974359,
+      "grad_norm": 0.028424395248293877,
+      "learning_rate": 7.96474358974359e-06,
+      "loss": 0.043,
+      "step": 850
+    },
+    {
+      "epoch": 4.410256410256411,
+      "grad_norm": 124.80490112304688,
+      "learning_rate": 7.911324786324787e-06,
+      "loss": 0.0298,
+      "step": 860
+    },
+    {
+      "epoch": 4.461538461538462,
+      "grad_norm": 0.049465615302324295,
+      "learning_rate": 7.857905982905984e-06,
+      "loss": 0.0881,
+      "step": 870
+    },
+    {
+      "epoch": 4.512820512820513,
+      "grad_norm": 0.04318946227431297,
+      "learning_rate": 7.80448717948718e-06,
+      "loss": 0.0526,
+      "step": 880
+    },
+    {
+      "epoch": 4.564102564102564,
+      "grad_norm": 0.02768160216510296,
+      "learning_rate": 7.751068376068377e-06,
+      "loss": 0.0012,
+      "step": 890
+    },
+    {
+      "epoch": 4.615384615384615,
+      "grad_norm": 0.02875378355383873,
+      "learning_rate": 7.697649572649573e-06,
+      "loss": 0.0857,
+      "step": 900
+    },
+    {
+      "epoch": 4.666666666666667,
+      "grad_norm": 0.08129971474409103,
+      "learning_rate": 7.64423076923077e-06,
+      "loss": 0.0023,
+      "step": 910
+    },
+    {
+      "epoch": 4.717948717948718,
+      "grad_norm": 0.0326925627887249,
+      "learning_rate": 7.590811965811966e-06,
+      "loss": 0.0595,
+      "step": 920
+    },
+    {
+      "epoch": 4.769230769230769,
+      "grad_norm": 0.027897467836737633,
+      "learning_rate": 7.537393162393163e-06,
+      "loss": 0.0415,
+      "step": 930
+    },
+    {
+      "epoch": 4.82051282051282,
+      "grad_norm": 0.0258422140032053,
+      "learning_rate": 7.48397435897436e-06,
+      "loss": 0.0018,
+      "step": 940
+    },
+    {
+      "epoch": 4.871794871794872,
+      "grad_norm": 0.0415211021900177,
+      "learning_rate": 7.4305555555555565e-06,
+      "loss": 0.0832,
+      "step": 950
+    },
+    {
+      "epoch": 4.923076923076923,
+      "grad_norm": 0.024675490334630013,
+      "learning_rate": 7.377136752136753e-06,
+      "loss": 0.001,
+      "step": 960
+    },
+    {
+      "epoch": 4.9743589743589745,
+      "grad_norm": 0.024120161309838295,
+      "learning_rate": 7.323717948717949e-06,
+      "loss": 0.001,
+      "step": 970
+    },
+    {
+      "epoch": 5.0,
+      "eval_f1": 0.99185667752443,
+      "eval_fbeta": 0.9967265967955792,
+      "eval_loss": 0.05167613551020622,
+      "eval_precision": 1.0,
+      "eval_recall": 0.9838449111470113,
+      "eval_runtime": 19.7601,
+      "eval_samples_per_second": 67.712,
+      "eval_steps_per_second": 8.502,
+      "step": 975
+    },
+    {
+      "epoch": 5.0256410256410255,
+      "grad_norm": 0.04074859991669655,
+      "learning_rate": 7.270299145299145e-06,
+      "loss": 0.0441,
+      "step": 980
+    },
+    {
+      "epoch": 5.076923076923077,
+      "grad_norm": 0.024757781997323036,
+      "learning_rate": 7.2168803418803426e-06,
+      "loss": 0.0009,
+      "step": 990
+    },
+    {
+      "epoch": 5.128205128205128,
+      "grad_norm": 0.02358800172805786,
+      "learning_rate": 7.163461538461539e-06,
+      "loss": 0.0388,
+      "step": 1000
+    },
+    {
+      "epoch": 5.17948717948718,
+      "grad_norm": 0.023568585515022278,
+      "learning_rate": 7.110042735042736e-06,
+      "loss": 0.001,
+      "step": 1010
+    },
+    {
+      "epoch": 5.230769230769231,
+      "grad_norm": 0.020907074213027954,
+      "learning_rate": 7.056623931623933e-06,
+      "loss": 0.0009,
+      "step": 1020
+    },
+    {
+      "epoch": 5.282051282051282,
+      "grad_norm": 0.01977492682635784,
+      "learning_rate": 7.003205128205129e-06,
+      "loss": 0.0008,
+      "step": 1030
+    },
+    {
+      "epoch": 5.333333333333333,
+      "grad_norm": 0.019288958981633186,
+      "learning_rate": 6.949786324786325e-06,
+      "loss": 0.0008,
+      "step": 1040
+    },
+    {
+      "epoch": 5.384615384615385,
+      "grad_norm": 0.01873486489057541,
+      "learning_rate": 6.896367521367522e-06,
+      "loss": 0.0008,
+      "step": 1050
+    },
+    {
+      "epoch": 5.435897435897436,
+      "grad_norm": 0.018183663487434387,
+      "learning_rate": 6.842948717948719e-06,
+      "loss": 0.0007,
+      "step": 1060
+    },
+    {
+      "epoch": 5.487179487179487,
+      "grad_norm": 0.01823146641254425,
+      "learning_rate": 6.7895299145299155e-06,
+      "loss": 0.0007,
+      "step": 1070
+    },
+    {
+      "epoch": 5.538461538461538,
+      "grad_norm": 0.017620518803596497,
+      "learning_rate": 6.736111111111112e-06,
+      "loss": 0.0007,
+      "step": 1080
+    },
+    {
+      "epoch": 5.589743589743589,
+      "grad_norm": 0.020309826359152794,
+      "learning_rate": 6.682692307692308e-06,
+      "loss": 0.0455,
+      "step": 1090
+    },
+    {
+      "epoch": 5.641025641025641,
+      "grad_norm": 0.022107329219579697,
+      "learning_rate": 6.629273504273504e-06,
+      "loss": 0.0009,
+      "step": 1100
+    },
+    {
+      "epoch": 5.6923076923076925,
+      "grad_norm": 0.020427662879228592,
+      "learning_rate": 6.5758547008547016e-06,
+      "loss": 0.0008,
+      "step": 1110
+    },
+    {
+      "epoch": 5.743589743589744,
+      "grad_norm": 0.019666729494929314,
+      "learning_rate": 6.522435897435898e-06,
+      "loss": 0.0008,
+      "step": 1120
+    },
+    {
+      "epoch": 5.794871794871795,
+      "grad_norm": 0.01790018193423748,
+      "learning_rate": 6.469017094017095e-06,
+      "loss": 0.0007,
+      "step": 1130
+    },
+    {
+      "epoch": 5.846153846153846,
+      "grad_norm": 0.017409320920705795,
+      "learning_rate": 6.415598290598292e-06,
+      "loss": 0.0007,
+      "step": 1140
+    },
+    {
+      "epoch": 5.897435897435898,
+      "grad_norm": 0.016504665836691856,
+      "learning_rate": 6.362179487179488e-06,
+      "loss": 0.0007,
+      "step": 1150
+    },
+    {
+      "epoch": 5.948717948717949,
+      "grad_norm": 0.01678137667477131,
+      "learning_rate": 6.308760683760684e-06,
+      "loss": 0.0006,
+      "step": 1160
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 0.015802927315235138,
+      "learning_rate": 6.255341880341881e-06,
+      "loss": 0.0006,
+      "step": 1170
+    },
+    {
+      "epoch": 6.0,
+      "eval_f1": 0.9991928974979822,
+      "eval_fbeta": 0.9987091811294363,
+      "eval_loss": 0.005148586817085743,
+      "eval_precision": 0.9983870967741936,
+      "eval_recall": 1.0,
+      "eval_runtime": 19.7873,
+      "eval_samples_per_second": 67.619,
+      "eval_steps_per_second": 8.49,
+      "step": 1170
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 2340,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 12,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 9.13186778936832e+17,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66fb52c99af4f3fabdba40955b0f2597cf91994a897b6b89c60340e35d35072f
+size 5777