Upload 7 files

Browse files

Files from the last checkpoint

Files changed (7) hide show

config.json +25 -0
model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
trainer_state.json +175 -0
training_args.bin +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "vocab_size": 30522
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae1982680ae9f9e1f8426f5519bf59cd68f2b0cd1bcf8b54ea5a3b1ee61c83c2
+size 267832560

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:340b9482dbdf0381d705a91ebd8683c8dff0fc4541963a96c4e4c5874352fef5
+size 535727290

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c501df58e6d2767dd7745cc81b361104778cedf852ddd90d3881310b0d2c1a02
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:020741e9c96faa781ae50007c960e18c228efeb68ef0c01c57db9440c484d6ef
+size 1064

trainer_state.json ADDED Viewed

	@@ -0,0 +1,175 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 7.0,
+  "eval_steps": 500,
+  "global_step": 13755,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.8857142857142856e-05,
+      "loss": 0.5664,
+      "step": 1965
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7443843461660834,
+      "eval_f1": 0.4215982721382289,
+      "eval_false_negatives": 3699,
+      "eval_false_positives": 318,
+      "eval_loss": 0.5374670624732971,
+      "eval_precision": 0.8215488215488216,
+      "eval_recall": 0.28355607205113303,
+      "eval_runtime": 750.1293,
+      "eval_samples_per_second": 20.95,
+      "eval_steps_per_second": 0.656,
+      "eval_true_negatives": 10234,
+      "eval_true_positives": 1464,
+      "step": 1965
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 1.5714285714285715e-05,
+      "loss": 0.4873,
+      "step": 3930
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.7466751511294941,
+      "eval_f1": 0.5101513473606497,
+      "eval_false_negatives": 3090,
+      "eval_false_positives": 891,
+      "eval_loss": 0.5381917357444763,
+      "eval_precision": 0.6993927125506073,
+      "eval_recall": 0.40151074956420685,
+      "eval_runtime": 749.6591,
+      "eval_samples_per_second": 20.963,
+      "eval_steps_per_second": 0.656,
+      "eval_true_negatives": 9661,
+      "eval_true_positives": 2073,
+      "step": 3930
+    },
+    {
+      "epoch": 3.0,
+      "learning_rate": 1.257142857142857e-05,
+      "loss": 0.3802,
+      "step": 5895
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.7215399300031817,
+      "eval_f1": 0.5454923140839218,
+      "eval_false_negatives": 2537,
+      "eval_false_positives": 1839,
+      "eval_loss": 0.6351324319839478,
+      "eval_precision": 0.5881298992161255,
+      "eval_recall": 0.5086190199496416,
+      "eval_runtime": 749.7171,
+      "eval_samples_per_second": 20.961,
+      "eval_steps_per_second": 0.656,
+      "eval_true_negatives": 8713,
+      "eval_true_positives": 2626,
+      "step": 5895
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 9.428571428571428e-06,
+      "loss": 0.2694,
+      "step": 7860
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.725739739102768,
+      "eval_f1": 0.5421712343318462,
+      "eval_false_negatives": 2611,
+      "eval_false_positives": 1699,
+      "eval_loss": 0.761067271232605,
+      "eval_precision": 0.6003293342742884,
+      "eval_recall": 0.49428626767383305,
+      "eval_runtime": 750.8442,
+      "eval_samples_per_second": 20.93,
+      "eval_steps_per_second": 0.655,
+      "eval_true_negatives": 8853,
+      "eval_true_positives": 2552,
+      "step": 7860
+    },
+    {
+      "epoch": 5.0,
+      "learning_rate": 6.285714285714285e-06,
+      "loss": 0.1848,
+      "step": 9825
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.7166401527203309,
+      "eval_f1": 0.5417309869301224,
+      "eval_false_negatives": 2531,
+      "eval_false_positives": 1922,
+      "eval_loss": 0.9724581241607666,
+      "eval_precision": 0.577953447518665,
+      "eval_recall": 0.5097811349990315,
+      "eval_runtime": 749.9897,
+      "eval_samples_per_second": 20.954,
+      "eval_steps_per_second": 0.656,
+      "eval_true_negatives": 8630,
+      "eval_true_positives": 2632,
+      "step": 9825
+    },
+    {
+      "epoch": 6.0,
+      "learning_rate": 3.1428571428571425e-06,
+      "loss": 0.1305,
+      "step": 11790
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.7165128857779192,
+      "eval_f1": 0.5395348837209303,
+      "eval_false_negatives": 2553,
+      "eval_false_positives": 1902,
+      "eval_loss": 1.1839183568954468,
+      "eval_precision": 0.5784574468085106,
+      "eval_recall": 0.505520046484602,
+      "eval_runtime": 816.6636,
+      "eval_samples_per_second": 19.243,
+      "eval_steps_per_second": 0.602,
+      "eval_true_negatives": 8650,
+      "eval_true_positives": 2610,
+      "step": 11790
+    },
+    {
+      "epoch": 7.0,
+      "learning_rate": 0.0,
+      "loss": 0.0948,
+      "step": 13755
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.7077951002227172,
+      "eval_f1": 0.5435387673956262,
+      "eval_false_negatives": 2429,
+      "eval_false_positives": 2163,
+      "eval_loss": 1.3593369722366333,
+      "eval_precision": 0.55830100061262,
+      "eval_recall": 0.5295370908386597,
+      "eval_runtime": 766.2151,
+      "eval_samples_per_second": 20.51,
+      "eval_steps_per_second": 0.642,
+      "eval_true_negatives": 8389,
+      "eval_true_positives": 2734,
+      "step": 13755
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 13755,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 7,
+  "save_steps": 500,
+  "total_flos": 5.828645021303194e+16,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fc2e54d19621e649aff8fa871a8ce1d2b85253752db0b0696832d2a9a732455
+size 4600