lilyray commited on
Commit
d09d451
·
verified ·
1 Parent(s): 98d1e8f

Training in progress, epoch 2

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72271a98ada970df47cd57bfbd4c33eb65e05fb14cb2ea7bf120b4542b53b217
3
  size 438014016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f47067ac901969774100d78a0a54299a73ae4642b1348ad514ca9ba59a14a426
3
  size 438014016
run-0/checkpoint-716/config.json CHANGED
@@ -1,25 +1,71 @@
1
  {
2
- "_name_or_path": "distilbert-base-uncased",
3
- "activation": "gelu",
4
  "architectures": [
5
- "DistilBertForSequenceClassification"
6
  ],
7
- "attention_dropout": 0.1,
8
- "dim": 768,
9
- "dropout": 0.1,
10
- "hidden_dim": 3072,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  "initializer_range": 0.02,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  "max_position_embeddings": 512,
13
- "model_type": "distilbert",
14
- "n_heads": 12,
15
- "n_layers": 6,
16
  "pad_token_id": 0,
 
17
  "problem_type": "single_label_classification",
18
- "qa_dropout": 0.1,
19
- "seq_classif_dropout": 0.2,
20
- "sinusoidal_pos_embds": false,
21
- "tie_weights_": true,
22
  "torch_dtype": "float32",
23
  "transformers_version": "4.38.2",
 
 
24
  "vocab_size": 30522
25
  }
 
1
  {
2
+ "_name_or_path": "google-bert/bert-base-uncased",
 
3
  "architectures": [
4
+ "BertForSequenceClassification"
5
  ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19"
33
+ },
34
  "initializer_range": 0.02,
35
+ "intermediate_size": 3072,
36
+ "label2id": {
37
+ "LABEL_0": 0,
38
+ "LABEL_1": 1,
39
+ "LABEL_10": 10,
40
+ "LABEL_11": 11,
41
+ "LABEL_12": 12,
42
+ "LABEL_13": 13,
43
+ "LABEL_14": 14,
44
+ "LABEL_15": 15,
45
+ "LABEL_16": 16,
46
+ "LABEL_17": 17,
47
+ "LABEL_18": 18,
48
+ "LABEL_19": 19,
49
+ "LABEL_2": 2,
50
+ "LABEL_3": 3,
51
+ "LABEL_4": 4,
52
+ "LABEL_5": 5,
53
+ "LABEL_6": 6,
54
+ "LABEL_7": 7,
55
+ "LABEL_8": 8,
56
+ "LABEL_9": 9
57
+ },
58
+ "layer_norm_eps": 1e-12,
59
  "max_position_embeddings": 512,
60
+ "model_type": "bert",
61
+ "num_attention_heads": 12,
62
+ "num_hidden_layers": 12,
63
  "pad_token_id": 0,
64
+ "position_embedding_type": "absolute",
65
  "problem_type": "single_label_classification",
 
 
 
 
66
  "torch_dtype": "float32",
67
  "transformers_version": "4.38.2",
68
+ "type_vocab_size": 2,
69
+ "use_cache": true,
70
  "vocab_size": 30522
71
  }
run-0/checkpoint-716/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f1653fe18ddcac289f7e57a18461e7deffd5befb44685ee397f45d7a6ebc77c
3
- size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f47067ac901969774100d78a0a54299a73ae4642b1348ad514ca9ba59a14a426
3
+ size 438014016
run-0/checkpoint-716/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:716a387eef1ebd0c65de8baed18dd0d7a67502b118c4e14dbbc893dec22f92ab
3
- size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f6a5ab7cb54869b7221000da5222027eff73b78cb665229fac9cf7ce62839ad
3
+ size 876149114
run-0/checkpoint-716/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8103c551332314f7d2856e2ffd500b0e2ed7cfd34359fe80e353bd1a70196c61
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c43d7365410d526d0532b6e2b2c68c0481daa243937c9c500196b2b3b0ea3fe
3
  size 14308
run-0/checkpoint-716/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da6c7cc1465652020bc3b8e0af7cd84f29ca0ea8b56590e2bebdfe4033a1116d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:865fc7b223eb8eb5157e358aa7f7a1c6fe46575ab236233eb6e0814e0c0cc7f0
3
  size 1064
run-0/checkpoint-716/tokenizer_config.json CHANGED
@@ -52,6 +52,6 @@
52
  "sep_token": "[SEP]",
53
  "strip_accents": null,
54
  "tokenize_chinese_chars": true,
55
- "tokenizer_class": "DistilBertTokenizer",
56
  "unk_token": "[UNK]"
57
  }
 
52
  "sep_token": "[SEP]",
53
  "strip_accents": null,
54
  "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
  "unk_token": "[UNK]"
57
  }
run-0/checkpoint-716/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.6083901524543762,
3
- "best_model_checkpoint": "./results/run-0/checkpoint-358",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 716,
@@ -10,42 +10,42 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6722222222222222,
14
- "eval_loss": 0.6083901524543762,
15
- "eval_runtime": 5.7737,
16
- "eval_samples_per_second": 165.406,
17
- "eval_steps_per_second": 20.784,
18
  "step": 358
19
  },
20
  {
21
  "epoch": 1.4,
22
- "grad_norm": 8.0650634765625,
23
- "learning_rate": 1.0536381136099229e-05,
24
- "loss": 0.6149,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.6920138888888888,
30
- "eval_loss": 0.6155831813812256,
31
- "eval_runtime": 6.4389,
32
- "eval_samples_per_second": 148.317,
33
- "eval_steps_per_second": 18.637,
34
  "step": 716
35
  }
36
  ],
37
  "logging_steps": 500,
38
- "max_steps": 1432,
39
  "num_input_tokens_seen": 0,
40
- "num_train_epochs": 4,
41
  "save_steps": 500,
42
- "total_flos": 529604659826688.0,
43
  "train_batch_size": 8,
44
  "trial_name": null,
45
  "trial_params": {
46
- "learning_rate": 1.6188946123276927e-05,
47
- "num_train_epochs": 4,
48
  "per_device_train_batch_size": 8,
49
- "seed": 20
50
  }
51
  }
 
1
  {
2
+ "best_metric": 0.6109534502029419,
3
+ "best_model_checkpoint": "./results/run-0/checkpoint-716",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 716,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6795138888888889,
14
+ "eval_loss": 0.6224137544631958,
15
+ "eval_runtime": 10.0132,
16
+ "eval_samples_per_second": 95.375,
17
+ "eval_steps_per_second": 11.984,
18
  "step": 358
19
  },
20
  {
21
  "epoch": 1.4,
22
+ "grad_norm": 4.7652764320373535,
23
+ "learning_rate": 3.870766963151035e-06,
24
+ "loss": 0.8365,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.6899305555555555,
30
+ "eval_loss": 0.6109534502029419,
31
+ "eval_runtime": 10.063,
32
+ "eval_samples_per_second": 94.902,
33
+ "eval_steps_per_second": 11.925,
34
  "step": 716
35
  }
36
  ],
37
  "logging_steps": 500,
38
+ "max_steps": 716,
39
  "num_input_tokens_seen": 0,
40
+ "num_train_epochs": 2,
41
  "save_steps": 500,
42
+ "total_flos": 1052088004780032.0,
43
  "train_batch_size": 8,
44
  "trial_name": null,
45
  "trial_params": {
46
+ "learning_rate": 1.2830875674148802e-05,
47
+ "num_train_epochs": 2,
48
  "per_device_train_batch_size": 8,
49
+ "seed": 14
50
  }
51
  }
run-0/checkpoint-716/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:756e0c6b01f2dc28da5639bc1c7bcb62246c7526b8bef0298780aea65e098381
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed7492942e77c037e56d91ad59372c979d1f8ff2bc44d6c8e69149791748c37a
3
  size 4920
runs/Mar09_22-55-18_c8a641adf9a5/events.out.tfevents.1710025487.c8a641adf9a5.2885.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45d0388660bc1986bb67af9a45308da3661bee7283480eda834a542a5105c898
3
- size 5726
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29e296354d8b86c6f6500dfec3998e4a57d68864a8ae4c57fad49f2906043dd7
3
+ size 6614