Training in progress, step 500, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/config.json +25 -0
last-checkpoint/model.safetensors +3 -0
last-checkpoint/optimizer.pt +3 -0
last-checkpoint/rng_state.pth +3 -0
last-checkpoint/scheduler.pt +3 -0
last-checkpoint/trainer_state.json +405 -0
last-checkpoint/training_args.bin +3 -0

last-checkpoint/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "vocab_size": 30522
+}

last-checkpoint/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a10a2ecaff875b9c46ad2bbd2fed17c2a0a46c72399b0499d9bca795a82b01a
+size 267832560

last-checkpoint/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ffdf6481862c29fc9f519ed97553ae9c619649345ac1473ff2b63f00a952157
+size 535727290

last-checkpoint/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e4a426a8b73c74a38f2a3b1243f7c773cf4681b425c8731e354a12e8672e330
+size 14244

last-checkpoint/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3cbe50d058b46466dc5c0d3a5f85c97b4ca24f57c286062ca922883cd2d25c9c
+size 1064

last-checkpoint/trainer_state.json ADDED Viewed

	@@ -0,0 +1,405 @@

+{
+  "best_metric": 0.0,
+  "best_model_checkpoint": "./results/checkpoint-500",
+  "epoch": 0.17094017094017094,
+  "eval_steps": 500,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.003418803418803419,
+      "grad_norm": 2.3258378505706787,
+      "learning_rate": 4.9943019943019945e-05,
+      "loss": 0.6681,
+      "step": 10
+    },
+    {
+      "epoch": 0.006837606837606838,
+      "grad_norm": 0.7698261737823486,
+      "learning_rate": 4.988603988603989e-05,
+      "loss": 0.6451,
+      "step": 20
+    },
+    {
+      "epoch": 0.010256410256410256,
+      "grad_norm": 1.6664257049560547,
+      "learning_rate": 4.982905982905983e-05,
+      "loss": 0.6485,
+      "step": 30
+    },
+    {
+      "epoch": 0.013675213675213675,
+      "grad_norm": 0.6200563907623291,
+      "learning_rate": 4.9772079772079774e-05,
+      "loss": 0.6453,
+      "step": 40
+    },
+    {
+      "epoch": 0.017094017094017096,
+      "grad_norm": 0.5258885622024536,
+      "learning_rate": 4.971509971509972e-05,
+      "loss": 0.6569,
+      "step": 50
+    },
+    {
+      "epoch": 0.020512820512820513,
+      "grad_norm": 0.5715610384941101,
+      "learning_rate": 4.965811965811966e-05,
+      "loss": 0.6508,
+      "step": 60
+    },
+    {
+      "epoch": 0.023931623931623933,
+      "grad_norm": 0.5744765400886536,
+      "learning_rate": 4.96011396011396e-05,
+      "loss": 0.6029,
+      "step": 70
+    },
+    {
+      "epoch": 0.02735042735042735,
+      "grad_norm": 0.9320403337478638,
+      "learning_rate": 4.9544159544159546e-05,
+      "loss": 0.6644,
+      "step": 80
+    },
+    {
+      "epoch": 0.03076923076923077,
+      "grad_norm": 0.5994309186935425,
+      "learning_rate": 4.948717948717949e-05,
+      "loss": 0.6757,
+      "step": 90
+    },
+    {
+      "epoch": 0.03418803418803419,
+      "grad_norm": 0.4685361385345459,
+      "learning_rate": 4.943019943019943e-05,
+      "loss": 0.6372,
+      "step": 100
+    },
+    {
+      "epoch": 0.037606837606837605,
+      "grad_norm": 0.6897755265235901,
+      "learning_rate": 4.9373219373219375e-05,
+      "loss": 0.6395,
+      "step": 110
+    },
+    {
+      "epoch": 0.041025641025641026,
+      "grad_norm": 0.5714218616485596,
+      "learning_rate": 4.931623931623932e-05,
+      "loss": 0.6323,
+      "step": 120
+    },
+    {
+      "epoch": 0.044444444444444446,
+      "grad_norm": 0.6862583160400391,
+      "learning_rate": 4.925925925925926e-05,
+      "loss": 0.6307,
+      "step": 130
+    },
+    {
+      "epoch": 0.04786324786324787,
+      "grad_norm": 1.1985986232757568,
+      "learning_rate": 4.9202279202279204e-05,
+      "loss": 0.6353,
+      "step": 140
+    },
+    {
+      "epoch": 0.05128205128205128,
+      "grad_norm": 0.4656996428966522,
+      "learning_rate": 4.9145299145299147e-05,
+      "loss": 0.6552,
+      "step": 150
+    },
+    {
+      "epoch": 0.0547008547008547,
+      "grad_norm": 1.3551446199417114,
+      "learning_rate": 4.908831908831909e-05,
+      "loss": 0.6484,
+      "step": 160
+    },
+    {
+      "epoch": 0.05811965811965812,
+      "grad_norm": 1.137487769126892,
+      "learning_rate": 4.903133903133903e-05,
+      "loss": 0.5905,
+      "step": 170
+    },
+    {
+      "epoch": 0.06153846153846154,
+      "grad_norm": 0.6064645051956177,
+      "learning_rate": 4.8974358974358975e-05,
+      "loss": 0.6157,
+      "step": 180
+    },
+    {
+      "epoch": 0.06495726495726496,
+      "grad_norm": 2.0975794792175293,
+      "learning_rate": 4.891737891737892e-05,
+      "loss": 0.6701,
+      "step": 190
+    },
+    {
+      "epoch": 0.06837606837606838,
+      "grad_norm": 0.48940032720565796,
+      "learning_rate": 4.886039886039887e-05,
+      "loss": 0.6342,
+      "step": 200
+    },
+    {
+      "epoch": 0.07179487179487179,
+      "grad_norm": 1.2511190176010132,
+      "learning_rate": 4.8803418803418804e-05,
+      "loss": 0.6521,
+      "step": 210
+    },
+    {
+      "epoch": 0.07521367521367521,
+      "grad_norm": 0.7074885964393616,
+      "learning_rate": 4.874643874643875e-05,
+      "loss": 0.6548,
+      "step": 220
+    },
+    {
+      "epoch": 0.07863247863247863,
+      "grad_norm": 1.152065396308899,
+      "learning_rate": 4.868945868945869e-05,
+      "loss": 0.6589,
+      "step": 230
+    },
+    {
+      "epoch": 0.08205128205128205,
+      "grad_norm": 0.39897221326828003,
+      "learning_rate": 4.863247863247863e-05,
+      "loss": 0.6595,
+      "step": 240
+    },
+    {
+      "epoch": 0.08547008547008547,
+      "grad_norm": 0.5259735584259033,
+      "learning_rate": 4.8575498575498576e-05,
+      "loss": 0.665,
+      "step": 250
+    },
+    {
+      "epoch": 0.08888888888888889,
+      "grad_norm": 0.5097119808197021,
+      "learning_rate": 4.851851851851852e-05,
+      "loss": 0.6498,
+      "step": 260
+    },
+    {
+      "epoch": 0.09230769230769231,
+      "grad_norm": 0.48037877678871155,
+      "learning_rate": 4.846153846153846e-05,
+      "loss": 0.5882,
+      "step": 270
+    },
+    {
+      "epoch": 0.09572649572649573,
+      "grad_norm": 0.6850088834762573,
+      "learning_rate": 4.840455840455841e-05,
+      "loss": 0.6329,
+      "step": 280
+    },
+    {
+      "epoch": 0.09914529914529914,
+      "grad_norm": 0.6092679500579834,
+      "learning_rate": 4.834757834757835e-05,
+      "loss": 0.6246,
+      "step": 290
+    },
+    {
+      "epoch": 0.10256410256410256,
+      "grad_norm": 1.0922237634658813,
+      "learning_rate": 4.829059829059829e-05,
+      "loss": 0.6144,
+      "step": 300
+    },
+    {
+      "epoch": 0.10598290598290598,
+      "grad_norm": 1.4150214195251465,
+      "learning_rate": 4.823361823361824e-05,
+      "loss": 0.643,
+      "step": 310
+    },
+    {
+      "epoch": 0.1094017094017094,
+      "grad_norm": 1.516169548034668,
+      "learning_rate": 4.817663817663818e-05,
+      "loss": 0.6046,
+      "step": 320
+    },
+    {
+      "epoch": 0.11282051282051282,
+      "grad_norm": 0.5234593749046326,
+      "learning_rate": 4.8119658119658126e-05,
+      "loss": 0.6193,
+      "step": 330
+    },
+    {
+      "epoch": 0.11623931623931624,
+      "grad_norm": 0.6485182046890259,
+      "learning_rate": 4.806267806267806e-05,
+      "loss": 0.6314,
+      "step": 340
+    },
+    {
+      "epoch": 0.11965811965811966,
+      "grad_norm": 0.9457536935806274,
+      "learning_rate": 4.8005698005698006e-05,
+      "loss": 0.5802,
+      "step": 350
+    },
+    {
+      "epoch": 0.12307692307692308,
+      "grad_norm": 1.2444144487380981,
+      "learning_rate": 4.7948717948717955e-05,
+      "loss": 0.5927,
+      "step": 360
+    },
+    {
+      "epoch": 0.1264957264957265,
+      "grad_norm": 0.499647855758667,
+      "learning_rate": 4.789173789173789e-05,
+      "loss": 0.6358,
+      "step": 370
+    },
+    {
+      "epoch": 0.12991452991452992,
+      "grad_norm": 2.130183696746826,
+      "learning_rate": 4.7834757834757834e-05,
+      "loss": 0.6324,
+      "step": 380
+    },
+    {
+      "epoch": 0.13333333333333333,
+      "grad_norm": 0.6378350257873535,
+      "learning_rate": 4.7777777777777784e-05,
+      "loss": 0.6061,
+      "step": 390
+    },
+    {
+      "epoch": 0.13675213675213677,
+      "grad_norm": 0.39135029911994934,
+      "learning_rate": 4.772079772079772e-05,
+      "loss": 0.6329,
+      "step": 400
+    },
+    {
+      "epoch": 0.14017094017094017,
+      "grad_norm": 0.5480381846427917,
+      "learning_rate": 4.766381766381767e-05,
+      "loss": 0.6607,
+      "step": 410
+    },
+    {
+      "epoch": 0.14358974358974358,
+      "grad_norm": 0.4431852400302887,
+      "learning_rate": 4.7606837606837606e-05,
+      "loss": 0.6233,
+      "step": 420
+    },
+    {
+      "epoch": 0.147008547008547,
+      "grad_norm": 0.4828330874443054,
+      "learning_rate": 4.754985754985755e-05,
+      "loss": 0.6437,
+      "step": 430
+    },
+    {
+      "epoch": 0.15042735042735042,
+      "grad_norm": 0.5272857546806335,
+      "learning_rate": 4.74928774928775e-05,
+      "loss": 0.6671,
+      "step": 440
+    },
+    {
+      "epoch": 0.15384615384615385,
+      "grad_norm": 1.4251387119293213,
+      "learning_rate": 4.7435897435897435e-05,
+      "loss": 0.658,
+      "step": 450
+    },
+    {
+      "epoch": 0.15726495726495726,
+      "grad_norm": 0.8041712641716003,
+      "learning_rate": 4.737891737891738e-05,
+      "loss": 0.6487,
+      "step": 460
+    },
+    {
+      "epoch": 0.1606837606837607,
+      "grad_norm": 0.7019796371459961,
+      "learning_rate": 4.732193732193733e-05,
+      "loss": 0.6019,
+      "step": 470
+    },
+    {
+      "epoch": 0.1641025641025641,
+      "grad_norm": 0.8561422228813171,
+      "learning_rate": 4.7264957264957264e-05,
+      "loss": 0.6897,
+      "step": 480
+    },
+    {
+      "epoch": 0.1675213675213675,
+      "grad_norm": 1.0677204132080078,
+      "learning_rate": 4.7207977207977214e-05,
+      "loss": 0.6848,
+      "step": 490
+    },
+    {
+      "epoch": 0.17094017094017094,
+      "grad_norm": 0.4762294590473175,
+      "learning_rate": 4.7150997150997157e-05,
+      "loss": 0.6527,
+      "step": 500
+    },
+    {
+      "epoch": 0.17094017094017094,
+      "eval_accuracy": 0.661082143772972,
+      "eval_f1": 0.0,
+      "eval_loss": 0.6433083415031433,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_roc_auc": 0.4981741909669265,
+      "eval_runtime": 36.622,
+      "eval_samples_per_second": 319.453,
+      "eval_steps_per_second": 19.988,
+      "step": 500
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 8775,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.001
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 264934797312000.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

last-checkpoint/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2be5ea5a8355405fdb4a1fa2f56c3eec77a53269f31e4155f897571249d4091b
+size 5368