AilingDAI commited on Mar 23, 2024

Commit

27d3d6f

verified ·

1 Parent(s): 70aa73b

Training in progress, epoch 1

Browse files

Files changed (37) hide show

model.safetensors +1 -1
run-3/checkpoint-536/config.json +25 -0
run-3/checkpoint-536/model.safetensors +3 -0
run-3/checkpoint-536/optimizer.pt +3 -0
run-3/checkpoint-536/rng_state.pth +3 -0
run-3/checkpoint-536/scheduler.pt +3 -0
run-3/checkpoint-536/special_tokens_map.json +7 -0
run-3/checkpoint-536/tokenizer.json +0 -0
run-3/checkpoint-536/tokenizer_config.json +55 -0
run-3/checkpoint-536/trainer_state.json +51 -0
run-3/checkpoint-536/training_args.bin +3 -0
run-3/checkpoint-536/vocab.txt +0 -0
run-3/checkpoint-804/config.json +25 -0
run-3/checkpoint-804/model.safetensors +3 -0
run-3/checkpoint-804/optimizer.pt +3 -0
run-3/checkpoint-804/rng_state.pth +3 -0
run-3/checkpoint-804/scheduler.pt +3 -0
run-3/checkpoint-804/special_tokens_map.json +7 -0
run-3/checkpoint-804/tokenizer.json +0 -0
run-3/checkpoint-804/tokenizer_config.json +55 -0
run-3/checkpoint-804/trainer_state.json +60 -0
run-3/checkpoint-804/training_args.bin +3 -0
run-3/checkpoint-804/vocab.txt +0 -0
run-4/checkpoint-134/config.json +25 -0
run-4/checkpoint-134/model.safetensors +3 -0
run-4/checkpoint-134/optimizer.pt +3 -0
run-4/checkpoint-134/rng_state.pth +3 -0
run-4/checkpoint-134/scheduler.pt +3 -0
run-4/checkpoint-134/special_tokens_map.json +7 -0
run-4/checkpoint-134/tokenizer.json +0 -0
run-4/checkpoint-134/tokenizer_config.json +55 -0
run-4/checkpoint-134/trainer_state.json +35 -0
run-4/checkpoint-134/training_args.bin +3 -0
run-4/checkpoint-134/vocab.txt +0 -0
runs/Mar23_02-10-25_52e0cb6c1736/events.out.tfevents.1711160674.52e0cb6c1736.459.5 +2 -2
runs/Mar23_02-10-25_52e0cb6c1736/events.out.tfevents.1711160778.52e0cb6c1736.459.6 +3 -0
training_args.bin +1 -1

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:726756e6aaf8542a88d22eac44451c6a7c584ddeb956346d43e9bf36b2e8c311
 size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:9113d76ba545fb086fb6a5e9e3fc213529275493d8432e21f9e80dab7fab4bca
 size 267832560

run-3/checkpoint-536/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.1",
+  "vocab_size": 30522
+}

run-3/checkpoint-536/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ec86e47b9208e0271d72aac635c2553d0a69623bcae3c419cc9d9e070530ec7
+size 267832560

run-3/checkpoint-536/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:986f7d97757f9b537d44386b83f77f9fe3b8e1ae218c66f72c18bcd57f5186ea
+size 535727290

run-3/checkpoint-536/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:177c3e13ba89dd3c772846b45ea81c44af694521955ca0ef000c19bacf8bc619
+size 14244

run-3/checkpoint-536/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f27f2cfb1a916fc142d060affb292f12f715f22cc95760fab1ec78531b28e56
+size 1064

run-3/checkpoint-536/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-536/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-536/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-536/trainer_state.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "best_metric": 0.3636302567913777,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-3/checkpoint-536",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 536,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5497495532035828,
+      "eval_matthews_correlation": 0.0928457264044978,
+      "eval_runtime": 0.7621,
+      "eval_samples_per_second": 1368.64,
+      "eval_steps_per_second": 86.606,
+      "step": 268
+    },
+    {
+      "epoch": 1.87,
+      "grad_norm": 3.906301498413086,
+      "learning_rate": 1.5090673992837595e-06,
+      "loss": 0.5538,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5196466445922852,
+      "eval_matthews_correlation": 0.3636302567913777,
+      "eval_runtime": 1.0628,
+      "eval_samples_per_second": 981.371,
+      "eval_steps_per_second": 62.1,
+      "step": 536
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 804,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 96158136429468.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 3.9910861481057325e-06,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 32,
+    "seed": 19
+  }
+}

run-3/checkpoint-536/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd40e13ada81b097b394cc550acc79b74483a245c86a38ab12015892bd5ce555
+size 4984

run-3/checkpoint-536/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-804/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.1",
+  "vocab_size": 30522
+}

run-3/checkpoint-804/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a97a41001ef6e895b020a2a12ec2f2cefd7fe8aedb18e81e17f08dccf20c3eb7
+size 267832560

run-3/checkpoint-804/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ad581c724102953af4dcdfd78cec5fb6556db19c0a8a90d2ddd8560efc80e64
+size 535727290

run-3/checkpoint-804/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:043d33b191b85d29becc8fc90cd248cf23846723a95a87c12bd765c7348a4904
+size 14244

run-3/checkpoint-804/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e608cf97231afcef17a07f83244ca9a201a8612a018b106874405cb95a982767
+size 1064

run-3/checkpoint-804/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-804/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-804/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-804/trainer_state.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "best_metric": 0.4033324209850535,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-3/checkpoint-804",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 804,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5497495532035828,
+      "eval_matthews_correlation": 0.0928457264044978,
+      "eval_runtime": 0.7621,
+      "eval_samples_per_second": 1368.64,
+      "eval_steps_per_second": 86.606,
+      "step": 268
+    },
+    {
+      "epoch": 1.87,
+      "grad_norm": 3.906301498413086,
+      "learning_rate": 1.5090673992837595e-06,
+      "loss": 0.5538,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5196466445922852,
+      "eval_matthews_correlation": 0.3636302567913777,
+      "eval_runtime": 1.0628,
+      "eval_samples_per_second": 981.371,
+      "eval_steps_per_second": 62.1,
+      "step": 536
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.5164278745651245,
+      "eval_matthews_correlation": 0.4033324209850535,
+      "eval_runtime": 0.8583,
+      "eval_samples_per_second": 1215.213,
+      "eval_steps_per_second": 76.897,
+      "step": 804
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 804,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 96158136429468.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 3.9910861481057325e-06,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 32,
+    "seed": 19
+  }
+}

run-3/checkpoint-804/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd40e13ada81b097b394cc550acc79b74483a245c86a38ab12015892bd5ce555
+size 4984

run-3/checkpoint-804/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-134/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.1",
+  "vocab_size": 30522
+}

run-4/checkpoint-134/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9113d76ba545fb086fb6a5e9e3fc213529275493d8432e21f9e80dab7fab4bca
+size 267832560

run-4/checkpoint-134/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b0106ff33b59455777abc2985d6ec13b640c6c055e4b7864d7fd5bbc451f140
+size 535727290

run-4/checkpoint-134/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a208b40e2f0aa886fa15a4a55da06d4448874802d6825c61697aad10fba501f
+size 14244

run-4/checkpoint-134/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25e9ff4ba45e6cdf15ab862ba9ffc5d5aba0cbfe1a4fe2f92274174a2fa1c312
+size 1064

run-4/checkpoint-134/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-134/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-134/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-134/trainer_state.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "best_metric": 0.46741180953359257,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-4/checkpoint-134",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 134,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.46314120292663574,
+      "eval_matthews_correlation": 0.46741180953359257,
+      "eval_runtime": 0.803,
+      "eval_samples_per_second": 1298.905,
+      "eval_steps_per_second": 82.193,
+      "step": 134
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 536,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 3.0100334336501613e-05,
+    "num_train_epochs": 4,
+    "per_device_train_batch_size": 64,
+    "seed": 15
+  }
+}

run-4/checkpoint-134/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:151c174a2ffa585401c97ac1e4a204237f812a0791c32e61c4a49909b4de743c
+size 4984

run-4/checkpoint-134/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

runs/Mar23_02-10-25_52e0cb6c1736/events.out.tfevents.1711160674.52e0cb6c1736.459.5 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98dc32f1d21d64e305220d1f19bbf48e78925b3924e07befcaa6dc93ec8555f5
-size 5468

 version https://git-lfs.github.com/spec/v1
+oid sha256:221b7f8b1ab52ae3502aade6819ee253fba3e44c2dd768925bfb9e3cd02ce721
+size 6157

runs/Mar23_02-10-25_52e0cb6c1736/events.out.tfevents.1711160778.52e0cb6c1736.459.6 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1bfa42faf095e3a82012afdd6ffa95f28ab919fa083b87ca26775225fc933e1
+size 6492

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd40e13ada81b097b394cc550acc79b74483a245c86a38ab12015892bd5ce555
 size 4984

 version https://git-lfs.github.com/spec/v1
+oid sha256:151c174a2ffa585401c97ac1e4a204237f812a0791c32e61c4a49909b4de743c
 size 4984