JeukHwang commited on Nov 13, 2023

Commit

aeb77bc

1 Parent(s): 3eb9199

Training in progress, epoch 1

Browse files

Files changed (37) hide show

logs/events.out.tfevents.1699905769.b18f42e49ba4.3021.4 +2 -2
logs/events.out.tfevents.1699906034.b18f42e49ba4.3021.5 +3 -0
model.safetensors +1 -1
run-3/checkpoint-1581/config.json +34 -0
run-3/checkpoint-1581/model.safetensors +3 -0
run-3/checkpoint-1581/optimizer.pt +3 -0
run-3/checkpoint-1581/rng_state.pth +3 -0
run-3/checkpoint-1581/scheduler.pt +3 -0
run-3/checkpoint-1581/special_tokens_map.json +7 -0
run-3/checkpoint-1581/tokenizer.json +0 -0
run-3/checkpoint-1581/tokenizer_config.json +57 -0
run-3/checkpoint-1581/trainer_state.json +69 -0
run-3/checkpoint-1581/training_args.bin +3 -0
run-3/checkpoint-1581/vocab.txt +0 -0
run-3/checkpoint-2108/config.json +34 -0
run-3/checkpoint-2108/model.safetensors +3 -0
run-3/checkpoint-2108/optimizer.pt +3 -0
run-3/checkpoint-2108/rng_state.pth +3 -0
run-3/checkpoint-2108/scheduler.pt +3 -0
run-3/checkpoint-2108/special_tokens_map.json +7 -0
run-3/checkpoint-2108/tokenizer.json +0 -0
run-3/checkpoint-2108/tokenizer_config.json +57 -0
run-3/checkpoint-2108/trainer_state.json +84 -0
run-3/checkpoint-2108/training_args.bin +3 -0
run-3/checkpoint-2108/vocab.txt +0 -0
run-4/checkpoint-527/config.json +34 -0
run-4/checkpoint-527/model.safetensors +3 -0
run-4/checkpoint-527/optimizer.pt +3 -0
run-4/checkpoint-527/rng_state.pth +3 -0
run-4/checkpoint-527/scheduler.pt +3 -0
run-4/checkpoint-527/special_tokens_map.json +7 -0
run-4/checkpoint-527/tokenizer.json +0 -0
run-4/checkpoint-527/tokenizer_config.json +57 -0
run-4/checkpoint-527/trainer_state.json +39 -0
run-4/checkpoint-527/training_args.bin +3 -0
run-4/checkpoint-527/vocab.txt +0 -0
training_args.bin +1 -1

logs/events.out.tfevents.1699905769.b18f42e49ba4.3021.4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c7eefbda1a162b7754570549490f5488e0da5721a8264686247a44f4080d996
-size 5253

 version https://git-lfs.github.com/spec/v1
+oid sha256:6154b3a2f62401a136a0dabe39d792967a72aac5b612e1fd8827e3540c35b96c
+size 6567

logs/events.out.tfevents.1699906034.b18f42e49ba4.3021.5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4525b47b7a03251941bc426667dd62b395702c8f014d7a7f54e63b03fd367bf
+size 4773

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:89069819ae8892d598e4918e8919337dbbfc0b94c6dbbc93e97805a5681d4bfa
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea2e623d90f0c42e0b67908b0a3c2828497a0900dde6a0177db66cce349ecec8
 size 17549312

run-3/checkpoint-1581/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.35.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-3/checkpoint-1581/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6fac2d67b4d24774974f3e9b4f3f3c27844c0edc399e1a2f01e32fe595f25eba
+size 17549312

run-3/checkpoint-1581/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a449054a5439f501cf5a09598f2233481d778f162e28825f703c3e9d9b20f5b9
+size 35123898

run-3/checkpoint-1581/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0145a794c4550fd779295bb332661a545e0c28f08ca5060bf05d2337c4c7b4d2
+size 14308

run-3/checkpoint-1581/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:36ec7c5d476370f0f90e5d17547a0a3cca2ee33759320b79e3ad8277e2c8dd2b
+size 1064

run-3/checkpoint-1581/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-1581/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-1581/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-1581/trainer_state.json ADDED Viewed

	@@ -0,0 +1,69 @@

+{
+  "best_metric": 0.8027522935779816,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-1581",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1581,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.598599288119205e-05,
+      "loss": 3.37,
+      "step": 527
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7844036697247706,
+      "eval_loss": 2.4027223587036133,
+      "eval_runtime": 0.8209,
+      "eval_samples_per_second": 1062.259,
+      "eval_steps_per_second": 8.527,
+      "step": 527
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.4020991455460686e-05,
+      "loss": 1.8949,
+      "step": 1054
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.7970183486238532,
+      "eval_loss": 2.0549635887145996,
+      "eval_runtime": 0.8227,
+      "eval_samples_per_second": 1059.877,
+      "eval_steps_per_second": 8.508,
+      "step": 1054
+    },
+    {
+      "epoch": 3.0,
+      "learning_rate": 1.2033242878729832e-05,
+      "loss": 1.5153,
+      "step": 1581
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.8027522935779816,
+      "eval_loss": 1.952178716659546,
+      "eval_runtime": 0.8301,
+      "eval_samples_per_second": 1050.447,
+      "eval_steps_per_second": 8.432,
+      "step": 1581
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2108,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 24280106603460.0,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.03192688286790302,
+    "learning_rate": 4.795099430692341e-05,
+    "num_train_epochs": 4,
+    "temperature": 30
+  }
+}

run-3/checkpoint-1581/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ecc175a79b2251c6cb04bf44f82ce91061574615bd4fc831925b19ea706ccac6
+size 4600

run-3/checkpoint-1581/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-2108/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.35.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-3/checkpoint-2108/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb35aeeee338c2a31fd9f963cfcd46ece7bb6d5144591ed96fde56cd7b764a6c
+size 17549312

run-3/checkpoint-2108/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d019940311ccd0fa5449f345d2534fe5210f9fa62587883da38f8f50af051f03
+size 35123898

run-3/checkpoint-2108/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5380d7490ac96d7f61709bde5d4086c1114991ed50d41459765b29873c758add
+size 14308

run-3/checkpoint-2108/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:579010a61264109b821ac039430843347659f4ae467afe7ae4a21485662fcecb
+size 1064

run-3/checkpoint-2108/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-2108/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-2108/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-2108/trainer_state.json ADDED Viewed

	@@ -0,0 +1,84 @@

+{
+  "best_metric": 0.8096330275229358,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-2108",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 2108,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.598599288119205e-05,
+      "loss": 3.37,
+      "step": 527
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7844036697247706,
+      "eval_loss": 2.4027223587036133,
+      "eval_runtime": 0.8209,
+      "eval_samples_per_second": 1062.259,
+      "eval_steps_per_second": 8.527,
+      "step": 527
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.4020991455460686e-05,
+      "loss": 1.8949,
+      "step": 1054
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.7970183486238532,
+      "eval_loss": 2.0549635887145996,
+      "eval_runtime": 0.8227,
+      "eval_samples_per_second": 1059.877,
+      "eval_steps_per_second": 8.508,
+      "step": 1054
+    },
+    {
+      "epoch": 3.0,
+      "learning_rate": 1.2033242878729832e-05,
+      "loss": 1.5153,
+      "step": 1581
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.8027522935779816,
+      "eval_loss": 1.952178716659546,
+      "eval_runtime": 0.8301,
+      "eval_samples_per_second": 1050.447,
+      "eval_steps_per_second": 8.432,
+      "step": 1581
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 4.549430199897857e-08,
+      "loss": 1.3787,
+      "step": 2108
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.8096330275229358,
+      "eval_loss": 1.9310358762741089,
+      "eval_runtime": 0.8352,
+      "eval_samples_per_second": 1044.036,
+      "eval_steps_per_second": 8.381,
+      "step": 2108
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2108,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 32320254802260.0,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.03192688286790302,
+    "learning_rate": 4.795099430692341e-05,
+    "num_train_epochs": 4,
+    "temperature": 30
+  }
+}

run-3/checkpoint-2108/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ecc175a79b2251c6cb04bf44f82ce91061574615bd4fc831925b19ea706ccac6
+size 4600

run-3/checkpoint-2108/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-527/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.35.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-4/checkpoint-527/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea2e623d90f0c42e0b67908b0a3c2828497a0900dde6a0177db66cce349ecec8
+size 17549312

run-4/checkpoint-527/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7dd3a55f5b381b7ed112307f9bf4fedb615d5af9ae39b0bdd64692a84ddc4916
+size 35123898

run-4/checkpoint-527/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ababcbf30496eb4976cce44a5faf0cf605467f452ba655745eb60d884a0042d8
+size 14308

run-4/checkpoint-527/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:675228c8b7a1b3e5847b15f71c627ba6d9b80dfff6229ccd4e3c7c6f5094cef2
+size 1064

run-4/checkpoint-527/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-527/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-527/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-527/trainer_state.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "best_metric": 0.8142201834862385,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-4/checkpoint-527",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 527,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 0.00040021870017567724,
+      "loss": 1.2163,
+      "step": 527
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.8142201834862385,
+      "eval_loss": 1.4123148918151855,
+      "eval_runtime": 0.9262,
+      "eval_samples_per_second": 941.52,
+      "eval_steps_per_second": 7.558,
+      "step": 527
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 3162,
+  "num_train_epochs": 6,
+  "save_steps": 500,
+  "total_flos": 8117476690260.0,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.3872215527509325,
+    "learning_rate": 0.0004802624402108127,
+    "num_train_epochs": 6,
+    "temperature": 26
+  }
+}

run-4/checkpoint-527/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8b08743d46499d2923be786e050cfb5462874efdc395fd5a374a78ef5a78d32
+size 4600

run-4/checkpoint-527/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ecc175a79b2251c6cb04bf44f82ce91061574615bd4fc831925b19ea706ccac6
 size 4600

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8b08743d46499d2923be786e050cfb5462874efdc395fd5a374a78ef5a78d32
 size 4600